Example #1
0
func (recorder *APTRecorder) saveGenericFiles(ingestState *models.IngestState) {
	filesToCreate := make([]*models.GenericFile, 0)
	filesToUpdate := make([]*models.GenericFile, 0)
	for i, gf := range ingestState.IngestManifest.Object.GenericFiles {
		// We run this check here, rather than in the validator,
		// because this is an APTrust-specific policy.
		if !util.HasSavableName(gf.OriginalPath()) {
			recorder.Context.MessageLog.Info("Will not save %s: does not match savable name pattern.",
				gf.Identifier)
			gf.IngestNeedsSave = false
		}
		if i%GENERIC_FILE_BATCH_SIZE == 0 {
			recorder.createGenericFiles(ingestState, filesToCreate)
			if ingestState.IngestManifest.RecordResult.HasErrors() {
				break
			}
			recorder.updateGenericFiles(ingestState, filesToUpdate)
			if ingestState.IngestManifest.RecordResult.HasErrors() {
				break
			}
			filesToCreate = make([]*models.GenericFile, 0)
			filesToUpdate = make([]*models.GenericFile, 0)
		}
		if gf.IngestNeedsSave {
			if gf.IngestPreviousVersionExists {
				if gf.Id > 0 {
					filesToUpdate = append(filesToUpdate, gf)
				} else {
					msg := fmt.Sprintf("GenericFile %s has a previous version, but its Id is missing.",
						gf.Identifier)
					recorder.Context.MessageLog.Error(msg)
					ingestState.IngestManifest.RecordResult.AddError(msg)
				}
			} else if gf.IngestNeedsSave && gf.Id == 0 {
				filesToCreate = append(filesToCreate, gf)
			}
		}
	}
	if !ingestState.IngestManifest.RecordResult.HasErrors() {
		recorder.createGenericFiles(ingestState, filesToCreate)
		recorder.updateGenericFiles(ingestState, filesToUpdate)
	}
}
Example #2
0
func TestSavableName(t *testing.T) {
	assert.False(t, util.HasSavableName("."))
	assert.False(t, util.HasSavableName(".."))
	assert.False(t, util.HasSavableName("._junk.txt"))
	assert.False(t, util.HasSavableName("data/subdir/._junk.txt"))
	assert.False(t, util.HasSavableName("bagit.txt"))
	assert.False(t, util.HasSavableName("manifest-md5.txt"))
	assert.False(t, util.HasSavableName("manifest-sha256.txt"))
	assert.False(t, util.HasSavableName("tagmanifest-md5.txt"))
	assert.False(t, util.HasSavableName("tagmanifest-sha256.txt"))

	assert.True(t, util.HasSavableName("data/stuff/bagit.txt"))
	assert.True(t, util.HasSavableName("custom_tags/manifest-md5.txt"))
	assert.True(t, util.HasSavableName("custom_tags/manifest-sha256.txt"))
	assert.True(t, util.HasSavableName("useless_tags/tagmanifest-md5.txt"))
	assert.True(t, util.HasSavableName("my_tags/tagmanifest-sha256.txt"))
	assert.True(t, util.HasSavableName("polly/wolly/doodle/all/day"))
}
Example #3
0
// This is the only method you should be calling.
func (reader *Reader) Untar() {
	reader.recordStartOfWork()
	if !reader.manifestInfoIsValid() {
		reader.Manifest.UntarResult.Finish()
		return
	}

	// Note the tar file's parent directory
	tarFileDir := filepath.Dir(reader.Manifest.Object.IngestTarFilePath)

	// Open the tar file for reading.
	file, err := os.Open(reader.Manifest.Object.IngestTarFilePath)
	if file != nil {
		defer file.Close()
	}
	if err != nil {
		reader.Manifest.UntarResult.AddError(
			"Could not open file %s for untarring: %v",
			reader.Manifest.Object.IngestTarFilePath, err)
		reader.Manifest.UntarResult.Finish()
		return
	}

	// Untar the file and record the results.
	reader.tarReader = tar.NewReader(file)

	for {
		header, err := reader.tarReader.Next()
		if err != nil && err.Error() == "EOF" {
			break // end of archive
		}
		if err != nil {
			reader.Manifest.UntarResult.AddError(
				"Error reading tar file header: %v. "+
					"Either this is not a tar file, or the file is corrupt.",
				err)
			reader.Manifest.UntarResult.Finish()
			return
		}

		// Top-level dir will be the first header entry.
		if reader.Manifest.Object.IngestUntarredPath == "" {
			topLevelDir, err := reader.getTopLevelDir(header.Name)
			if err != nil {
				reader.Manifest.UntarResult.AddError(err.Error())
				reader.Manifest.UntarResult.Finish()
				return
			}
			reader.Manifest.Object.IngestUntarredPath = filepath.Join(tarFileDir, topLevelDir)
		}

		// Get the output path for this file -> Where should we untar it to?
		outputPath := filepath.Join(reader.Manifest.Object.IngestUntarredPath, header.Name)

		// Make sure the directory that we're about to write into exists.
		err = os.MkdirAll(filepath.Dir(outputPath), 0755)
		if err != nil {
			reader.Manifest.UntarResult.AddError("Could not create destination file '%s' "+
				"while unpacking tar archive: %v", outputPath, err)
			return
		}

		// Copy the file, if it's an actual file. Otherwise, ignore it and record
		// a warning. The bag library does not deal with items like symlinks.
		if header.Typeflag == tar.TypeReg || header.Typeflag == tar.TypeRegA {
			fileName, err := getFileName(header.Name)
			if err != nil {
				reader.Manifest.UntarResult.AddError(err.Error())
				reader.Manifest.UntarResult.Finish()
				return
			}
			if util.HasSavableName(fileName) {
				gf := reader.createAndSaveGenericFile(fileName, header)
				if gf.IngestErrorMessage != "" {
					reader.Manifest.UntarResult.AddError(gf.IngestErrorMessage)
					reader.Manifest.UntarResult.Finish()
					return
				}
			} else {
				// This is probably something like bagit.txt or a manifest,
				// which we must save to disk but won't need to preserve in
				// long-term storage
				reader.Manifest.Object.IngestFilesIgnored = append(
					reader.Manifest.Object.IngestFilesIgnored, outputPath)
				err = reader.saveFile(outputPath)
				if err != nil {
					reader.Manifest.UntarResult.AddError(
						"Error copying file from tar archive to '%s': %v",
						outputPath, err)
					reader.Manifest.UntarResult.Finish()
					return
				}
			}
		} else if header.Typeflag != tar.TypeDir {
			// Header item is neither file nor directory.
			// Do nothing, but record that we saw this item.
			reader.Manifest.Object.IngestFilesIgnored = append(
				reader.Manifest.Object.IngestFilesIgnored,
				header.Name)
		}
	}
	reader.Manifest.UntarResult.Finish()
}