func (recorder *APTRecorder) saveGenericFiles(ingestState *models.IngestState) { filesToCreate := make([]*models.GenericFile, 0) filesToUpdate := make([]*models.GenericFile, 0) for i, gf := range ingestState.IngestManifest.Object.GenericFiles { // We run this check here, rather than in the validator, // because this is an APTrust-specific policy. if !util.HasSavableName(gf.OriginalPath()) { recorder.Context.MessageLog.Info("Will not save %s: does not match savable name pattern.", gf.Identifier) gf.IngestNeedsSave = false } if i%GENERIC_FILE_BATCH_SIZE == 0 { recorder.createGenericFiles(ingestState, filesToCreate) if ingestState.IngestManifest.RecordResult.HasErrors() { break } recorder.updateGenericFiles(ingestState, filesToUpdate) if ingestState.IngestManifest.RecordResult.HasErrors() { break } filesToCreate = make([]*models.GenericFile, 0) filesToUpdate = make([]*models.GenericFile, 0) } if gf.IngestNeedsSave { if gf.IngestPreviousVersionExists { if gf.Id > 0 { filesToUpdate = append(filesToUpdate, gf) } else { msg := fmt.Sprintf("GenericFile %s has a previous version, but its Id is missing.", gf.Identifier) recorder.Context.MessageLog.Error(msg) ingestState.IngestManifest.RecordResult.AddError(msg) } } else if gf.IngestNeedsSave && gf.Id == 0 { filesToCreate = append(filesToCreate, gf) } } } if !ingestState.IngestManifest.RecordResult.HasErrors() { recorder.createGenericFiles(ingestState, filesToCreate) recorder.updateGenericFiles(ingestState, filesToUpdate) } }
func TestSavableName(t *testing.T) { assert.False(t, util.HasSavableName(".")) assert.False(t, util.HasSavableName("..")) assert.False(t, util.HasSavableName("._junk.txt")) assert.False(t, util.HasSavableName("data/subdir/._junk.txt")) assert.False(t, util.HasSavableName("bagit.txt")) assert.False(t, util.HasSavableName("manifest-md5.txt")) assert.False(t, util.HasSavableName("manifest-sha256.txt")) assert.False(t, util.HasSavableName("tagmanifest-md5.txt")) assert.False(t, util.HasSavableName("tagmanifest-sha256.txt")) assert.True(t, util.HasSavableName("data/stuff/bagit.txt")) assert.True(t, util.HasSavableName("custom_tags/manifest-md5.txt")) assert.True(t, util.HasSavableName("custom_tags/manifest-sha256.txt")) assert.True(t, util.HasSavableName("useless_tags/tagmanifest-md5.txt")) assert.True(t, util.HasSavableName("my_tags/tagmanifest-sha256.txt")) assert.True(t, util.HasSavableName("polly/wolly/doodle/all/day")) }
// This is the only method you should be calling. func (reader *Reader) Untar() { reader.recordStartOfWork() if !reader.manifestInfoIsValid() { reader.Manifest.UntarResult.Finish() return } // Note the tar file's parent directory tarFileDir := filepath.Dir(reader.Manifest.Object.IngestTarFilePath) // Open the tar file for reading. file, err := os.Open(reader.Manifest.Object.IngestTarFilePath) if file != nil { defer file.Close() } if err != nil { reader.Manifest.UntarResult.AddError( "Could not open file %s for untarring: %v", reader.Manifest.Object.IngestTarFilePath, err) reader.Manifest.UntarResult.Finish() return } // Untar the file and record the results. reader.tarReader = tar.NewReader(file) for { header, err := reader.tarReader.Next() if err != nil && err.Error() == "EOF" { break // end of archive } if err != nil { reader.Manifest.UntarResult.AddError( "Error reading tar file header: %v. "+ "Either this is not a tar file, or the file is corrupt.", err) reader.Manifest.UntarResult.Finish() return } // Top-level dir will be the first header entry. if reader.Manifest.Object.IngestUntarredPath == "" { topLevelDir, err := reader.getTopLevelDir(header.Name) if err != nil { reader.Manifest.UntarResult.AddError(err.Error()) reader.Manifest.UntarResult.Finish() return } reader.Manifest.Object.IngestUntarredPath = filepath.Join(tarFileDir, topLevelDir) } // Get the output path for this file -> Where should we untar it to? outputPath := filepath.Join(reader.Manifest.Object.IngestUntarredPath, header.Name) // Make sure the directory that we're about to write into exists. err = os.MkdirAll(filepath.Dir(outputPath), 0755) if err != nil { reader.Manifest.UntarResult.AddError("Could not create destination file '%s' "+ "while unpacking tar archive: %v", outputPath, err) return } // Copy the file, if it's an actual file. Otherwise, ignore it and record // a warning. The bag library does not deal with items like symlinks. if header.Typeflag == tar.TypeReg || header.Typeflag == tar.TypeRegA { fileName, err := getFileName(header.Name) if err != nil { reader.Manifest.UntarResult.AddError(err.Error()) reader.Manifest.UntarResult.Finish() return } if util.HasSavableName(fileName) { gf := reader.createAndSaveGenericFile(fileName, header) if gf.IngestErrorMessage != "" { reader.Manifest.UntarResult.AddError(gf.IngestErrorMessage) reader.Manifest.UntarResult.Finish() return } } else { // This is probably something like bagit.txt or a manifest, // which we must save to disk but won't need to preserve in // long-term storage reader.Manifest.Object.IngestFilesIgnored = append( reader.Manifest.Object.IngestFilesIgnored, outputPath) err = reader.saveFile(outputPath) if err != nil { reader.Manifest.UntarResult.AddError( "Error copying file from tar archive to '%s': %v", outputPath, err) reader.Manifest.UntarResult.Finish() return } } } else if header.Typeflag != tar.TypeDir { // Header item is neither file nor directory. // Do nothing, but record that we saw this item. reader.Manifest.Object.IngestFilesIgnored = append( reader.Manifest.Object.IngestFilesIgnored, header.Name) } } reader.Manifest.UntarResult.Finish() }