// DeleteBagFromStaging deletes the bag from the staging area, and releases // the reserved storage from the volume manager. This deletes both the tarred // and untarred version of the bag, if they both exist. func DeleteBagFromStaging(ingestState *models.IngestState, _context *context.Context, activeResult *models.WorkSummary) { tarFile := ingestState.IngestManifest.Object.IngestTarFilePath if tarFile != "" && fileutil.FileExists(tarFile) { _context.MessageLog.Info("Deleting %s", tarFile) err := os.Remove(tarFile) if err != nil { _context.MessageLog.Warning(err.Error()) } err = _context.VolumeClient.Release(tarFile) if err != nil { _context.MessageLog.Warning(err.Error()) } } else { _context.MessageLog.Info("Skipping deletion of %s: file does not exist", tarFile) } untarredBagPath := ingestState.IngestManifest.Object.IngestUntarredPath looksSafeToDelete := fileutil.LooksSafeToDelete(untarredBagPath, 12, 3) if fileutil.FileExists(untarredBagPath) && looksSafeToDelete { _context.MessageLog.Info("Deleting untarred bag at %s", untarredBagPath) err := os.RemoveAll(untarredBagPath) if err != nil { _context.MessageLog.Warning(err.Error()) } err = _context.VolumeClient.Release(untarredBagPath) if err != nil { _context.MessageLog.Warning(err.Error()) } } else { _context.MessageLog.Info("Skipping deletion of untarred bag dir at %s: "+ "Directory does not exist, or is unsafe to delete.", untarredBagPath) } }
func TestFileExists(t *testing.T) { if fileutil.FileExists("fileutil_test.go") == false { t.Errorf("FileExists returned false for fileutil_test.go") } if fileutil.FileExists("NonExistentFile.xyz") == true { t.Errorf("FileExists returned true for NonExistentFile.xyz") } }
// NewBagValidator creates a new BagValidator. Param pathToBag // should be an absolute path to either the tarred bag (.tar file) // or to the untarred bag (a directory). Param bagValidationConfig // defines what we need to validate, in addition to the checksums in the // manifests. func NewBagValidator(pathToBag string, bagValidationConfig *BagValidationConfig) (*BagValidator, error) { if !fileutil.FileExists(pathToBag) { return nil, fmt.Errorf("Bag does not exist at %s", pathToBag) } if bagValidationConfig == nil { return nil, fmt.Errorf("Param bagValidationConfig cannot be nil") } configErrors := bagValidationConfig.ValidateConfig() if len(configErrors) > 0 { errString := "BagValidationConfig has the following errors:" for _, e := range configErrors { errString += fmt.Sprintf("\n%s", e.Error()) } return nil, fmt.Errorf(errString) } err := bagValidationConfig.CompileFileNameRegex() if err != nil { return nil, fmt.Errorf("Error in BagValidationConfig: %v", err) } calculateMd5 := util.StringListContains(bagValidationConfig.FixityAlgorithms, constants.AlgMd5) calculateSha256 := util.StringListContains(bagValidationConfig.FixityAlgorithms, constants.AlgSha256) tagFilesToParse := make([]string, 0) for pathToFile, filespec := range bagValidationConfig.FileSpecs { if filespec.ParseAsTagFile { tagFilesToParse = append(tagFilesToParse, pathToFile) } } bagValidator := &BagValidator{ PathToBag: pathToBag, BagValidationConfig: bagValidationConfig, virtualBag: models.NewVirtualBag(pathToBag, tagFilesToParse, calculateMd5, calculateSha256), } return bagValidator, nil }
// Expand the tilde in the data_path setting to an absolute path. // Returns the expanded path, or an error. func expandedDataDir(configFile string) (string, error) { file, err := os.Open(configFile) if err != nil { return "", fmt.Errorf("Cannot open config file: %v\n", err) } defer file.Close() bufReader := bufio.NewReader(file) for { line, err := bufReader.ReadString('\n') if err == io.EOF { break } else if err != nil { return "", err } cleanLine := strings.TrimSpace(line) if strings.HasPrefix(cleanLine, "data_path") { parts := strings.SplitN(cleanLine, "=", 2) if len(parts) < 2 { return "", fmt.Errorf("Config file setting for data_path is missing or malformed.") } expanded, err := fileutil.ExpandTilde(util.CleanString(parts[1])) if err != nil { return "", fmt.Errorf("Cannot expand data_dir setting '%s': %v", parts[1], err) } if !fileutil.FileExists(expanded) { fmt.Printf("Creating NSQ data directory %s \n", expanded) os.MkdirAll(expanded, 0755) } return expanded, nil } } return "", nil }
func TestDPNBagInfo(t *testing.T) { builder := createBagBuilder(t) defer tearDown() require.NotNil(t, builder) tagfile, err := builder.Bag.TagFile("bag-info.txt") require.Nil(t, err) require.Empty(t, builder.ErrorMessage) require.NotNil(t, tagfile) expected := filepath.Join(builder.LocalPath, "bag-info.txt") assert.Equal(t, expected, tagfile.Name()) assert.True(t, fileutil.FileExists(expected)) verifyTagField(t, tagfile, "Source-Organization", "uc.edu") verifyTagField(t, tagfile, "Organization-Address", "") verifyTagField(t, tagfile, "Contact-Name", "") verifyTagField(t, tagfile, "Contact-Phone", "") verifyTagField(t, tagfile, "Contact-Email", "") verifyTagField(t, tagfile, "Bagging-Date", builder.BagTime()) verifyTagField(t, tagfile, "Bag-Size", "686") verifyTagField(t, tagfile, "Bag-Group-Identifier", "") verifyTagField(t, tagfile, "Bag-Count", "1") // Make sure the bagging date was set. assert.NotEqual(t, builder.BagTime(), "0001-01-01T00:00:00Z") }
// Dumps a JSON representation of this object to a file at the specified // path. This will overwrite the existing file, if the existing file has // a .json extension. Note that converting the stats object to JSON can // use a lot of memory, if you're working with a lot of data. This is safe // for integration testing, and it dumps out human-readable formatted JSON. // See also APTBucketReaderStatsLoadFromFile. func (stats *APTBucketReaderStats) DumpToFile(pathToFile string) error { // Matches .json, or tempfile with random ending, like .json43272 fileNameLooksSafe, err := regexp.MatchString("\\.json\\d*$", pathToFile) if err != nil { return fmt.Errorf("DumpToFile(): path '%s'?? : %v", pathToFile, err) } if fileutil.FileExists(pathToFile) && !fileNameLooksSafe { return fmt.Errorf("DumpToFile() will not overwrite existing file "+ "'%s' because that might be dangerous. Give your output file a .json "+ "extension to be safe.", pathToFile) } jsonData, err := json.MarshalIndent(stats, "", " ") if err != nil { return err } outputFile, err := os.Create(pathToFile) if err != nil { return err } defer outputFile.Close() outputFile.Write(jsonData) return nil }
func TestDPNInfo(t *testing.T) { builder := createBagBuilder(t) defer tearDown() require.NotNil(t, builder) tagfile, err := builder.Bag.TagFile("dpn-tags/dpn-info.txt") require.Nil(t, err) require.Empty(t, builder.ErrorMessage) require.NotNil(t, tagfile) expected := filepath.Join(builder.LocalPath, "dpn-tags", "dpn-info.txt") assert.Equal(t, expected, tagfile.Name()) assert.True(t, fileutil.FileExists(expected)) verifyTagField(t, tagfile, "DPN-Object-ID", builder.UUID) verifyTagField(t, tagfile, "Local-ID", "uc.edu/cin.675812") verifyTagField(t, tagfile, "First-Node-Name", "APTrust") verifyTagField(t, tagfile, "First-Node-Address", "160 McCormick Rd., Charlottesville, VA 22904") verifyTagField(t, tagfile, "First-Node-Contact-Name", "APTrust Administrator") verifyTagField(t, tagfile, "First-Node-Contact-Email", "*****@*****.**") verifyTagField(t, tagfile, "Version-Number", "1") verifyTagField(t, tagfile, "Previous-Version-Object-ID", "") verifyTagField(t, tagfile, "Interpretive-Object-ID", "") verifyTagField(t, tagfile, "Rights-Object-ID", "") verifyTagField(t, tagfile, "Object-Type", dpn.BAG_TYPE_DATA) }
func TestBucket_DumpToFile(t *testing.T) { _stats := makeAPTBucketReaderStats() tempfile, err := ioutil.TempFile("", "apt_bucket_reader_stats_test.json") require.Nil(t, err) defer os.Remove(tempfile.Name()) err = _stats.DumpToFile(tempfile.Name()) require.Nil(t, err) assert.True(t, fileutil.FileExists(tempfile.Name())) tempFileStat, err := tempfile.Stat() require.Nil(t, err) assert.True(t, tempFileStat.Size() > 1000) }
// buildDPNBag bags the DPNBag object that will eventually be entered into // the DPN registry. This is not the bag itself, just the DPN registry entry. func (packager *DPNPackager) buildDPNBag(manifest *models.DPNIngestManifest) { packager.Context.MessageLog.Info("Building DPN bag record for %s", manifest.IntellectualObject.Identifier) depositingInstitution := packager.getInstitution(manifest) if depositingInstitution == nil { return } manifest.DPNBag = models.NewDPNBag( manifest.IntellectualObject.Identifier, depositingInstitution.DPNUUID, packager.Context.Config.DPN.LocalNode) // Calculate the sha256 digest of the tag manifest. This is used for // validating bag transfers in DPN. Note that we are NOT using a // nonce when we call shaHash.Sum(nil). Though the DPN spec allows // us to use a nonce, no nodes are using nonces as of late 2016. tagManifestFile := filepath.Join(manifest.LocalDir, "tagmanifest-sha256.txt") if !fileutil.FileExists(tagManifestFile) { manifest.PackageSummary.AddError("Cannot find tag manifest %s", tagManifestFile) return } reader, err := os.Open(tagManifestFile) if err != nil { manifest.PackageSummary.AddError("Cannot read tag manifest at %s: %v", tagManifestFile, err) return } defer reader.Close() shaHash := sha256.New() io.Copy(shaHash, reader) tagManifestDigest := fmt.Sprintf("%x", shaHash.Sum(nil)) // Now create the MessageDigest for this bag, with the tag manifest // checksum that will be used to verify transfers. When a remote // node copies this bag to fulfill a replication request, we expect // the node to return this fixity value as proof that it received // a valid copy of the bag. digest := &models.MessageDigest{ Bag: manifest.DPNBag.UUID, Algorithm: constants.AlgSha256, Node: packager.Context.Config.DPN.LocalNode, Value: tagManifestDigest, CreatedAt: time.Now().UTC(), } manifest.DPNBag.MessageDigests = append(manifest.DPNBag.MessageDigests, digest) // Now that we have a valid DPN bag object, we can name the tar file. // According to the DPN spec, the tar file name should be the bag's // UUID plus a ".tar" extension. parentOfBagDir := filepath.Dir(manifest.LocalDir) manifest.LocalTarFile = filepath.Join(parentOfBagDir, manifest.DPNBag.UUID+".tar") }
// ------------------------------------------------------------------------- // Step 3 of 4: Cleanup (conditional) // // cleanup deletes the tar file we just downloaded, if we determine that // something is wrong with it and there should be no further processing. // If the bag is valid, we leave it in the staging area. The next process // (store) will pick it up and copy files to S3 and Glacier. // ------------------------------------------------------------------------- func (fetcher *APTFetcher) cleanup() { for ingestState := range fetcher.CleanupChannel { tarFile := ingestState.IngestManifest.Object.IngestTarFilePath hasErrors := (ingestState.IngestManifest.FetchResult.HasErrors() || ingestState.IngestManifest.ValidateResult.HasErrors()) if hasErrors && fileutil.FileExists(tarFile) { // Most likely bad md5 digest, but perhaps also a partial download. fetcher.Context.MessageLog.Info("Deleting due to download error: %s", tarFile) DeleteBagFromStaging(ingestState, fetcher.Context, ingestState.IngestManifest.FetchResult) } fetcher.RecordChannel <- ingestState } }
func TestItemsCopiedToStaging(t *testing.T) { // Make sure that each of the expected bags has shown // up in our test staging area. if !testutil.ShouldRunIntegrationTests() { t.Skip("Skipping integration test. Set ENV var RUN_EXCHANGE_INTEGRATION=true if you want to run them.") } for i := 2; i <= 5; i++ { _context, err := testutil.GetContext("integration.json") require.Nil(t, err, "Could not create context") filename := fmt.Sprintf("00000000-0000-4000-a000-00000000000%d.tar", i) path := filepath.Join(_context.Config.DPN.StagingDirectory, filename) assert.True(t, fileutil.FileExists(path), "File %s was not copied", path) } }
func TestAPTrustBagit(t *testing.T) { builder := createBagBuilder(t) defer tearDown() require.NotNil(t, builder) tagfile, err := builder.Bag.TagFile("aptrust-tags/bagit.txt") require.Nil(t, err) require.Empty(t, builder.ErrorMessage) require.NotNil(t, tagfile) expected := filepath.Join(builder.LocalPath, "aptrust-tags", "bagit.txt") assert.Equal(t, expected, tagfile.Name()) assert.True(t, fileutil.FileExists(expected)) verifyTagField(t, tagfile, "BagIt-Version", "0.97") verifyTagField(t, tagfile, "Tag-File-Character-Encoding", "UTF-8") }
func TestEnsureLogDir(t *testing.T) { config := getSimpleDirConfig() absLogPath, err := config.EnsureLogDirectory() require.Nil(t, err) assert.True(t, strings.HasPrefix(absLogPath, "/")) assert.True(t, fileutil.FileExists(absLogPath)) assert.True(t, fileutil.FileExists(config.TarDirectory)) assert.True(t, fileutil.FileExists(config.LogDirectory)) assert.True(t, fileutil.FileExists(config.RestoreDirectory)) assert.True(t, fileutil.FileExists(config.ReplicationDirectory)) assert.True(t, fileutil.FileExists(config.DPN.LogDirectory)) assert.True(t, fileutil.FileExists(config.DPN.StagingDirectory)) }
func TestDPNBagit(t *testing.T) { builder := createBagBuilder(t) defer tearDown() if builder == nil { return } tagfile, err := builder.Bag.TagFile("bagit.txt") require.Nil(t, err) require.Empty(t, err) require.NotNil(t, tagfile) expected := filepath.Join(builder.LocalPath, "bagit.txt") assert.Equal(t, expected, tagfile.Name()) verifyTagField(t, tagfile, "BagIt-Version", "0.97") verifyTagField(t, tagfile, "Tag-File-Character-Encoding", "UTF-8") assert.True(t, fileutil.FileExists(expected)) }
func (config *Config) createDirectories() error { if config.TarDirectory == "" { return fmt.Errorf("You must define config.TarDirectory") } if config.LogDirectory == "" { return fmt.Errorf("You must define config.LogDirectory") } if config.RestoreDirectory == "" { return fmt.Errorf("You must define config.RestoreDirectory") } if config.ReplicationDirectory == "" { return fmt.Errorf("You must define config.ReplicationDirectory") } if !fileutil.FileExists(config.TarDirectory) { err := os.MkdirAll(config.TarDirectory, 0755) if err != nil { return err } } if !fileutil.FileExists(config.LogDirectory) { err := os.MkdirAll(config.LogDirectory, 0755) if err != nil { return err } } if !fileutil.FileExists(config.RestoreDirectory) { err := os.MkdirAll(config.RestoreDirectory, 0755) if err != nil { return err } } if !fileutil.FileExists(config.ReplicationDirectory) { err := os.MkdirAll(config.ReplicationDirectory, 0755) if err != nil { return err } } // TODO: Test these two if config.DPN.LogDirectory != "" && !fileutil.FileExists(config.DPN.LogDirectory) { err := os.MkdirAll(config.DPN.LogDirectory, 0755) if err != nil { return err } } if config.DPN.StagingDirectory != "" && !fileutil.FileExists(config.DPN.StagingDirectory) { err := os.MkdirAll(config.DPN.StagingDirectory, 0755) if err != nil { return err } } return nil }
func TestInitLogger(t *testing.T) { config := getLoggingTestConfig(t) defer teardownLoggerTest(config) log, filename := logger.InitLogger(config) log.Error("Test Message") logFile := filepath.Join(config.AbsLogDirectory(), path.Base(os.Args[0])+".log") if !fileutil.FileExists(logFile) { t.Errorf("Log file does not exist at %s", logFile) } if filename != logFile { t.Errorf("Expected log file path '%s', got '%s'", logFile, filename) } data, err := ioutil.ReadFile(logFile) if err != nil { t.Error(err) } if false == strings.HasSuffix(string(data), "Test Message\n") { t.Error("Expected message was not in the message log.") } }
func TestInitJsonLogger(t *testing.T) { config := getLoggingTestConfig(t) defer teardownLoggerTest(config) log, filename := logger.InitJsonLogger(config) log.Println("{a:100}") logFile := filepath.Join(config.AbsLogDirectory(), path.Base(os.Args[0])+".json") if !fileutil.FileExists(logFile) { t.Errorf("Log file does not exist at %s", logFile) } if filename != logFile { t.Errorf("Expected log file path '%s', got '%s'", logFile, filename) } data, err := ioutil.ReadFile(logFile) if err != nil { t.Error(err) } if string(data) != "{a:100}\n" { t.Error("Expected message was not in the json log.") } }
// Returns true if we can skip fetch and validate. We can skip those // steps if on a previous run we validated the bag, and it's still // there in our working directory. This anticipates the case where // we did those steps but were not able to update the WorkItem record // in Pharos at the end of the fetch/validate process. func (fetcher *APTFetcher) canSkipFetchAndValidate(ingestState *models.IngestState) bool { return (ingestState.WorkItem.Stage == constants.StageValidate && ingestState.IngestManifest.ValidateResult.Finished() && !ingestState.IngestManifest.HasFatalErrors() && fileutil.FileExists(ingestState.IngestManifest.Object.IngestTarFilePath)) }