Exemple #1
0
func CopyingPlacer(srcBasePath, destBasePath string, _ bool) integrity.Emplacement {
	srcBaseStat, err := os.Stat(srcBasePath)
	if err != nil || !srcBaseStat.IsDir() {
		panic(Error.New("copyingplacer: srcPath %q must be dir: %s", srcBasePath, err))
	}
	destBaseStat, err := os.Stat(destBasePath)
	if err != nil || !destBaseStat.IsDir() {
		panic(Error.New("copyingplacer: destPath %q must be dir: %s", destBasePath, err))
	}
	// remove any files already here (to emulate behavior like an overlapping mount)
	// (can't take the easy route and just `os.RemoveAll(destBasePath)` because that propagates times changes onto the parent.)
	d, err := os.Open(destBasePath)
	if err != nil {
		panic(Error.New("copyingplacer: io error: %s", err))
	}
	names, err := d.Readdirnames(-1)
	if err != nil {
		panic(Error.New("copyingplacer: io error: %s", err))
	}
	for _, name := range names {
		err := os.RemoveAll(filepath.Join(destBasePath, name))
		if err != nil {
			panic(Error.New("copyingplacer: io error: %s", err))
		}
	}
	// walk and copy
	preVisit := func(filenode *fs.FilewalkNode) error {
		if filenode.Err != nil {
			return filenode.Err
		}
		hdr, file := fs.ScanFile(srcBasePath, filenode.Path, filenode.Info)
		if file != nil {
			defer file.Close()
		}
		fs.PlaceFile(destBasePath, hdr, file)
		return nil
	}
	postVisit := func(filenode *fs.FilewalkNode) error {
		if filenode.Info.IsDir() {
			if err := fspatch.UtimesNano(filepath.Join(destBasePath, filenode.Path), def.Epochwhen, filenode.Info.ModTime()); err != nil {
				return err
			}
		}
		return nil
	}
	try.Do(func() {
		if err := fs.Walk(srcBasePath, preVisit, postVisit); err != nil {
			panic(err)
		}
	}).CatchAll(func(err error) {
		panic(Error.New("copyingplacer: io failed: %s", err))
	}).Done()

	return copyEmplacement{path: destBasePath}
}
Exemple #2
0
/*
	Create files described by the fixtures on the real filesystem path given.
*/
func (ffs Fixture) Create(basePath string) {
	basePath, err := filepath.Abs(basePath)
	if err != nil {
		panic(errors.IOError.Wrap(err))
	}
	if err := os.MkdirAll(basePath, 0755); err != nil {
		panic(errors.IOError.Wrap(err))
	}
	for _, f := range ffs.Files {
		fs.PlaceFile(basePath, f.Metadata, bytes.NewBuffer(f.Body))
	}
	// re-do time enforcement... in reverse order, so we cover our own tracks
	for i := len(ffs.Files) - 1; i >= 0; i-- {
		f := ffs.Files[i]
		if f.Metadata.Typeflag == tar.TypeDir {
			fs.PlaceDirTime(basePath, f.Metadata)
		}
	}
}
Exemple #3
0
func NewAufsPlacer(workPath string) integrity.Placer {
	err := os.MkdirAll(workPath, 0755)
	if err != nil {
		panic(errors.IOError.Wrap(err))
	}
	workPath, err = filepath.Abs(workPath)
	if err != nil {
		panic(errors.IOError.Wrap(err))
	}
	return func(srcBasePath, destBasePath string, writable bool) integrity.Emplacement {
		srcBaseStat, err := os.Stat(srcBasePath)
		if err != nil || !srcBaseStat.IsDir() {
			panic(Error.New("aufsplacer: srcPath %q must be dir: %s", srcBasePath, err))
		}
		destBaseStat, err := os.Stat(destBasePath)
		if err != nil || !destBaseStat.IsDir() {
			panic(Error.New("aufsplacer: destPath %q must be dir: %s", destBasePath, err))
		}
		// if a RO mount is requested, no need to set up COW; just hand off to bind.
		if !writable {
			return BindPlacer(srcBasePath, destBasePath, writable)
		}
		// make work dir for the overlay layer
		layerPath, err := ioutil.TempDir(workPath, "layer-")
		if err != nil {
			panic(errors.IOError.Wrap(err))
		}
		// set up COW
		// if you were doing this in a shell, it'd be roughly `mount -t aufs -o br="$layer":"$base" none "$composite"`.
		// yes, this may behave oddly in the event of paths containing ":" or "=".
		syscall.Mount("none", destBasePath, "aufs", 0, fmt.Sprintf("br:%s=rw:%s=ro", layerPath, srcBasePath))
		// fix props on layerPath; otherwise they instantly leak through
		hdr, _ := fs.ScanFile(srcBasePath, "./", srcBaseStat)
		fs.PlaceFile(layerPath, hdr, nil)
		// that's it; setting up COW also mounted it into destination.
		return aufsEmplacement{
			layerPath:   layerPath,
			landingPath: destBasePath,
		}
	}
}
Exemple #4
0
func Extract(tr *tar.Reader, destBasePath string, bucket fshash.Bucket, hasherFactory func() hash.Hash) {
	for {
		thdr, err := tr.Next()
		if err == io.EOF {
			break // end of archive
		}
		if err != nil {
			panic(integrity.WarehouseConnectionError.New("corrupt tar: %s", err))
		}
		hdr := fs.Metadata(*thdr)
		// filter/sanify values:
		// - names must be clean, relative dot-slash prefixed, and dirs slash-suffixed
		// - times should never be go's zero value; replace those with epoch
		// Note that names at this point should be handled by `path` (not `filepath`; these are canonical form for feed to hashing)
		hdr.Name = path.Clean(hdr.Name)
		if strings.HasPrefix(hdr.Name, "../") {
			panic(integrity.WarehouseConnectionError.New("corrupt tar: paths that use '../' to leave the base dir are invalid"))
		}
		if hdr.Name != "." {
			hdr.Name = "./" + hdr.Name
		}
		if hdr.ModTime.IsZero() {
			hdr.ModTime = def.Epochwhen
		}
		if hdr.AccessTime.IsZero() {
			hdr.AccessTime = def.Epochwhen
		}
		// conjure parents, if necessary.  tar format allows implicit parent dirs.
		// Note that if any of the implicitly conjured dirs is specified later, unpacking won't notice,
		// but bucket hashing iteration will (correctly) blow up for repeat entries.
		// It may well be possible to construct a tar like that, but it's already well established that
		// tars with repeated filenames are just asking for trouble and shall be rejected without
		// ceremony because they're just a ridiculous idea.
		parts := strings.Split(hdr.Name, "/")
		for i := range parts[:len(parts)-1] {
			i++
			_, err := os.Lstat(filepath.Join(append([]string{destBasePath}, parts[:i]...)...))
			// if it already exists, move along; if the error is anything interesting, let PlaceFile decide how to deal with it
			if err == nil || !os.IsNotExist(err) {
				continue
			}
			// if we're missing a dir, conjure a node with defaulted values (same as we do for "./")
			conjuredHdr := fshash.DefaultDirRecord().Metadata
			conjuredHdr.Name = strings.Join(parts[:i], "/") + "/" // path.Join does cleaning; unwanted.
			fs.PlaceFile(destBasePath, conjuredHdr, nil)
			bucket.Record(conjuredHdr, nil)
		}
		// place the file
		switch hdr.Typeflag {
		case tar.TypeReg:
			reader := &flak.HashingReader{tr, hasherFactory()}
			fs.PlaceFile(destBasePath, hdr, reader)
			bucket.Record(hdr, reader.Hasher.Sum(nil))
		case tar.TypeDir:
			hdr.Name += "/"
			fallthrough
		default:
			fs.PlaceFile(destBasePath, hdr, nil)
			bucket.Record(hdr, nil)
		}
	}
	// cleanup dir times with a post-order traversal over the bucket
	if err := treewalk.Walk(bucket.Iterator(), nil, func(node treewalk.Node) error {
		record := node.(fshash.RecordIterator).Record()
		if record.Metadata.Typeflag == tar.TypeDir {
			fs.PlaceDirTime(destBasePath, record.Metadata)
		}
		return nil
	}); err != nil {
		panic(err)
	}
}
Exemple #5
0
/*
	Arenas produced by Dir Transmats may be relocated by simple `mv`.
*/
func (t *S3Transmat) Materialize(
	kind integrity.TransmatKind,
	dataHash integrity.CommitID,
	siloURIs []integrity.SiloURI,
	options ...integrity.MaterializerConfigurer,
) integrity.Arena {
	var arena dirArena
	try.Do(func() {
		// Basic validation and config
		config := integrity.EvaluateConfig(options...)
		if kind != Kind {
			panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind))
		}

		// Parse URI; Find warehouses.
		if len(siloURIs) < 1 {
			panic(integrity.ConfigError.New("Materialization requires at least one data source!"))
			// Note that it's possible a caching layer will satisfy things even without data sources...
			//  but if that was going to happen, it already would have by now.
		}
		// Our policy is to take the first path that exists.
		//  This lets you specify a series of potential locations, and if one is unavailable we'll just take the next.
		var warehouseBucketName string
		var warehousePathPrefix string
		var warehouseCtntAddr bool
		for _, givenURI := range siloURIs {
			u, err := url.Parse(string(givenURI))
			if err != nil {
				panic(integrity.ConfigError.New("failed to parse URI: %s", err))
			}
			warehouseBucketName = u.Host
			warehousePathPrefix = u.Path
			switch u.Scheme {
			case "s3":
				warehouseCtntAddr = false
			case "s3+splay":
				warehouseCtntAddr = true
			default:
				panic(integrity.ConfigError.New("unrecognized scheme: %q", u.Scheme))
			}
			// TODO figure out how to check for data (or at least warehouse!) presence;
			//  currently just assuming the first one's golden, and blowing up later if it's not.
			break
		}
		if warehouseBucketName == "" {
			panic(integrity.WarehouseConnectionError.New("No warehouses were available!"))
		}

		// load keys from env
		// TODO someday URIs should grow smart enough to control this in a more general fashion -- but for now, host ENV is actually pretty feasible and plays easily with others.
		// TODO should not require keys!  we're just reading, after all; anon access is 100% valid.
		//   Buuuuut s3gof3r doesn't seem to understand empty keys; it still sends them as if to login, and AWS says 403.  So, foo.
		keys, err := s3gof3r.EnvKeys()
		if err != nil {
			panic(S3CredentialsMissingError.Wrap(err))
		}

		// initialize reader from s3!
		getPath := warehousePathPrefix
		if warehouseCtntAddr {
			getPath = path.Join(warehousePathPrefix, string(dataHash))
		}
		s3reader := makeS3reader(warehouseBucketName, getPath, keys)
		defer s3reader.Close()
		// prepare decompression as necessary
		reader, err := tartrans.Decompress(s3reader)
		if err != nil {
			panic(integrity.WarehouseConnectionError.New("could not start decompressing: %s", err))
		}
		tarReader := tar.NewReader(reader)

		// Create staging arena to produce data into.
		arena.path, err = ioutil.TempDir(t.workPath, "")
		if err != nil {
			panic(integrity.TransmatError.New("Unable to create arena: %s", err))
		}

		// walk input tar stream, placing data and accumulating hashes and metadata for integrity check
		bucket := &fshash.MemoryBucket{}
		tartrans.Extract(tarReader, arena.Path(), bucket, hasherFactory)

		// bucket processing may have created a root node if missing.  if so, we need to apply its props.
		fs.PlaceFile(arena.Path(), bucket.Root().Metadata, nil)

		// hash whole tree
		actualTreeHash := fshash.Hash(bucket, hasherFactory)

		// verify total integrity
		expectedTreeHash, err := base64.URLEncoding.DecodeString(string(dataHash))
		if err != nil {
			panic(integrity.ConfigError.New("Could not parse hash: %s", err))
		}
		if bytes.Equal(actualTreeHash, expectedTreeHash) {
			// excellent, got what we asked for.
			arena.hash = dataHash
		} else {
			// this may or may not be grounds for panic, depending on configuration.
			if config.AcceptHashMismatch {
				// if we're tolerating mismatches, report the actual hash through different mechanisms.
				// you probably only ever want to use this in tests or debugging; in prod it's just asking for insanity.
				arena.hash = integrity.CommitID(actualTreeHash)
			} else {
				panic(integrity.NewHashMismatchError(string(dataHash), base64.URLEncoding.EncodeToString(actualTreeHash)))
			}
		}
	}).Catch(integrity.Error, func(err *errors.Error) {
		panic(err)
	}).CatchAll(func(err error) {
		panic(integrity.UnknownError.Wrap(err))
	}).Done()
	return arena
}
Exemple #6
0
/*
	Arenas produced by Tar Transmats may be relocated by simple `mv`.
*/
func (t *TarTransmat) Materialize(
	kind integrity.TransmatKind,
	dataHash integrity.CommitID,
	siloURIs []integrity.SiloURI,
	options ...integrity.MaterializerConfigurer,
) integrity.Arena {
	var arena tarArena
	try.Do(func() {
		// Basic validation and config
		config := integrity.EvaluateConfig(options...)
		if kind != Kind {
			panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind))
		}

		// Ping silos
		if len(siloURIs) < 1 {
			panic(integrity.ConfigError.New("Materialization requires at least one data source!"))
			// Note that it's possible a caching layer will satisfy things even without data sources...
			//  but if that was going to happen, it already would have by now.
		}
		// Our policy is to take the first path that exists.
		//  This lets you specify a series of potential locations, and if one is unavailable we'll just take the next.
		var stream io.Reader
		for _, uri := range siloURIs {
			try.Do(func() {
				stream = makeReader(dataHash, uri)
			}).Catch(integrity.DataDNE, func(err *errors.Error) {
				// fine, we'll just try the next one
				// TODO LOGGING
			}).Catch(integrity.WarehouseConnectionError, func(err *errors.Error) {
				// ... this does kind of seem to indicate we should have "warehouse offline or DNE" be separate from "tcp flaked after we shook on it yo"
				// for now we consider both fatal.  revist this when we get smarter logging, etc
				panic(err)
			}).Done()
			if stream != nil {
				break
			}
		}
		if stream == nil {
			panic(integrity.WarehouseConnectionError.New("No warehouses were available!"))
		}

		// Wrap input stream with decompression as necessary
		reader, err := Decompress(stream)
		if err != nil {
			panic(integrity.WarehouseConnectionError.New("could not start decompressing: %s", err))
		}
		tarReader := tar.NewReader(reader)

		// Create staging arena to produce data into.
		arena.path, err = ioutil.TempDir(t.workPath, "")
		if err != nil {
			panic(integrity.TransmatError.New("Unable to create arena: %s", err))
		}

		// walk input tar stream, placing data and accumulating hashes and metadata for integrity check
		bucket := &fshash.MemoryBucket{}
		Extract(tarReader, arena.Path(), bucket, hasherFactory)

		// bucket processing may have created a root node if missing.  if so, we need to apply its props.
		fs.PlaceFile(arena.Path(), bucket.Root().Metadata, nil)

		// hash whole tree
		actualTreeHash := fshash.Hash(bucket, hasherFactory)

		// verify total integrity
		expectedTreeHash, err := base64.URLEncoding.DecodeString(string(dataHash))
		if err != nil {
			panic(integrity.ConfigError.New("Could not parse hash: %s", err))
		}
		if bytes.Equal(actualTreeHash, expectedTreeHash) {
			// excellent, got what we asked for.
			arena.hash = dataHash
		} else {
			// this may or may not be grounds for panic, depending on configuration.
			if config.AcceptHashMismatch {
				// if we're tolerating mismatches, report the actual hash through different mechanisms.
				// you probably only ever want to use this in tests or debugging; in prod it's just asking for insanity.
				arena.hash = integrity.CommitID(actualTreeHash)
			} else {
				panic(integrity.NewHashMismatchError(string(dataHash), base64.URLEncoding.EncodeToString(actualTreeHash)))
			}
		}
	}).Catch(integrity.Error, func(err *errors.Error) {
		panic(err)
	}).CatchAll(func(err error) {
		panic(integrity.UnknownError.Wrap(err))
	}).Done()
	return arena
}