func CopyingPlacer(srcBasePath, destBasePath string, _ bool) integrity.Emplacement { srcBaseStat, err := os.Stat(srcBasePath) if err != nil || !srcBaseStat.IsDir() { panic(Error.New("copyingplacer: srcPath %q must be dir: %s", srcBasePath, err)) } destBaseStat, err := os.Stat(destBasePath) if err != nil || !destBaseStat.IsDir() { panic(Error.New("copyingplacer: destPath %q must be dir: %s", destBasePath, err)) } // remove any files already here (to emulate behavior like an overlapping mount) // (can't take the easy route and just `os.RemoveAll(destBasePath)` because that propagates times changes onto the parent.) d, err := os.Open(destBasePath) if err != nil { panic(Error.New("copyingplacer: io error: %s", err)) } names, err := d.Readdirnames(-1) if err != nil { panic(Error.New("copyingplacer: io error: %s", err)) } for _, name := range names { err := os.RemoveAll(filepath.Join(destBasePath, name)) if err != nil { panic(Error.New("copyingplacer: io error: %s", err)) } } // walk and copy preVisit := func(filenode *fs.FilewalkNode) error { if filenode.Err != nil { return filenode.Err } hdr, file := fs.ScanFile(srcBasePath, filenode.Path, filenode.Info) if file != nil { defer file.Close() } fs.PlaceFile(destBasePath, hdr, file) return nil } postVisit := func(filenode *fs.FilewalkNode) error { if filenode.Info.IsDir() { if err := fspatch.UtimesNano(filepath.Join(destBasePath, filenode.Path), def.Epochwhen, filenode.Info.ModTime()); err != nil { return err } } return nil } try.Do(func() { if err := fs.Walk(srcBasePath, preVisit, postVisit); err != nil { panic(err) } }).CatchAll(func(err error) { panic(Error.New("copyingplacer: io failed: %s", err)) }).Done() return copyEmplacement{path: destBasePath} }
/* Create files described by the fixtures on the real filesystem path given. */ func (ffs Fixture) Create(basePath string) { basePath, err := filepath.Abs(basePath) if err != nil { panic(errors.IOError.Wrap(err)) } if err := os.MkdirAll(basePath, 0755); err != nil { panic(errors.IOError.Wrap(err)) } for _, f := range ffs.Files { fs.PlaceFile(basePath, f.Metadata, bytes.NewBuffer(f.Body)) } // re-do time enforcement... in reverse order, so we cover our own tracks for i := len(ffs.Files) - 1; i >= 0; i-- { f := ffs.Files[i] if f.Metadata.Typeflag == tar.TypeDir { fs.PlaceDirTime(basePath, f.Metadata) } } }
func NewAufsPlacer(workPath string) integrity.Placer { err := os.MkdirAll(workPath, 0755) if err != nil { panic(errors.IOError.Wrap(err)) } workPath, err = filepath.Abs(workPath) if err != nil { panic(errors.IOError.Wrap(err)) } return func(srcBasePath, destBasePath string, writable bool) integrity.Emplacement { srcBaseStat, err := os.Stat(srcBasePath) if err != nil || !srcBaseStat.IsDir() { panic(Error.New("aufsplacer: srcPath %q must be dir: %s", srcBasePath, err)) } destBaseStat, err := os.Stat(destBasePath) if err != nil || !destBaseStat.IsDir() { panic(Error.New("aufsplacer: destPath %q must be dir: %s", destBasePath, err)) } // if a RO mount is requested, no need to set up COW; just hand off to bind. if !writable { return BindPlacer(srcBasePath, destBasePath, writable) } // make work dir for the overlay layer layerPath, err := ioutil.TempDir(workPath, "layer-") if err != nil { panic(errors.IOError.Wrap(err)) } // set up COW // if you were doing this in a shell, it'd be roughly `mount -t aufs -o br="$layer":"$base" none "$composite"`. // yes, this may behave oddly in the event of paths containing ":" or "=". syscall.Mount("none", destBasePath, "aufs", 0, fmt.Sprintf("br:%s=rw:%s=ro", layerPath, srcBasePath)) // fix props on layerPath; otherwise they instantly leak through hdr, _ := fs.ScanFile(srcBasePath, "./", srcBaseStat) fs.PlaceFile(layerPath, hdr, nil) // that's it; setting up COW also mounted it into destination. return aufsEmplacement{ layerPath: layerPath, landingPath: destBasePath, } } }
func Extract(tr *tar.Reader, destBasePath string, bucket fshash.Bucket, hasherFactory func() hash.Hash) { for { thdr, err := tr.Next() if err == io.EOF { break // end of archive } if err != nil { panic(integrity.WarehouseConnectionError.New("corrupt tar: %s", err)) } hdr := fs.Metadata(*thdr) // filter/sanify values: // - names must be clean, relative dot-slash prefixed, and dirs slash-suffixed // - times should never be go's zero value; replace those with epoch // Note that names at this point should be handled by `path` (not `filepath`; these are canonical form for feed to hashing) hdr.Name = path.Clean(hdr.Name) if strings.HasPrefix(hdr.Name, "../") { panic(integrity.WarehouseConnectionError.New("corrupt tar: paths that use '../' to leave the base dir are invalid")) } if hdr.Name != "." { hdr.Name = "./" + hdr.Name } if hdr.ModTime.IsZero() { hdr.ModTime = def.Epochwhen } if hdr.AccessTime.IsZero() { hdr.AccessTime = def.Epochwhen } // conjure parents, if necessary. tar format allows implicit parent dirs. // Note that if any of the implicitly conjured dirs is specified later, unpacking won't notice, // but bucket hashing iteration will (correctly) blow up for repeat entries. // It may well be possible to construct a tar like that, but it's already well established that // tars with repeated filenames are just asking for trouble and shall be rejected without // ceremony because they're just a ridiculous idea. parts := strings.Split(hdr.Name, "/") for i := range parts[:len(parts)-1] { i++ _, err := os.Lstat(filepath.Join(append([]string{destBasePath}, parts[:i]...)...)) // if it already exists, move along; if the error is anything interesting, let PlaceFile decide how to deal with it if err == nil || !os.IsNotExist(err) { continue } // if we're missing a dir, conjure a node with defaulted values (same as we do for "./") conjuredHdr := fshash.DefaultDirRecord().Metadata conjuredHdr.Name = strings.Join(parts[:i], "/") + "/" // path.Join does cleaning; unwanted. fs.PlaceFile(destBasePath, conjuredHdr, nil) bucket.Record(conjuredHdr, nil) } // place the file switch hdr.Typeflag { case tar.TypeReg: reader := &flak.HashingReader{tr, hasherFactory()} fs.PlaceFile(destBasePath, hdr, reader) bucket.Record(hdr, reader.Hasher.Sum(nil)) case tar.TypeDir: hdr.Name += "/" fallthrough default: fs.PlaceFile(destBasePath, hdr, nil) bucket.Record(hdr, nil) } } // cleanup dir times with a post-order traversal over the bucket if err := treewalk.Walk(bucket.Iterator(), nil, func(node treewalk.Node) error { record := node.(fshash.RecordIterator).Record() if record.Metadata.Typeflag == tar.TypeDir { fs.PlaceDirTime(destBasePath, record.Metadata) } return nil }); err != nil { panic(err) } }
/* Arenas produced by Dir Transmats may be relocated by simple `mv`. */ func (t *S3Transmat) Materialize( kind integrity.TransmatKind, dataHash integrity.CommitID, siloURIs []integrity.SiloURI, options ...integrity.MaterializerConfigurer, ) integrity.Arena { var arena dirArena try.Do(func() { // Basic validation and config config := integrity.EvaluateConfig(options...) if kind != Kind { panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind)) } // Parse URI; Find warehouses. if len(siloURIs) < 1 { panic(integrity.ConfigError.New("Materialization requires at least one data source!")) // Note that it's possible a caching layer will satisfy things even without data sources... // but if that was going to happen, it already would have by now. } // Our policy is to take the first path that exists. // This lets you specify a series of potential locations, and if one is unavailable we'll just take the next. var warehouseBucketName string var warehousePathPrefix string var warehouseCtntAddr bool for _, givenURI := range siloURIs { u, err := url.Parse(string(givenURI)) if err != nil { panic(integrity.ConfigError.New("failed to parse URI: %s", err)) } warehouseBucketName = u.Host warehousePathPrefix = u.Path switch u.Scheme { case "s3": warehouseCtntAddr = false case "s3+splay": warehouseCtntAddr = true default: panic(integrity.ConfigError.New("unrecognized scheme: %q", u.Scheme)) } // TODO figure out how to check for data (or at least warehouse!) presence; // currently just assuming the first one's golden, and blowing up later if it's not. break } if warehouseBucketName == "" { panic(integrity.WarehouseConnectionError.New("No warehouses were available!")) } // load keys from env // TODO someday URIs should grow smart enough to control this in a more general fashion -- but for now, host ENV is actually pretty feasible and plays easily with others. // TODO should not require keys! we're just reading, after all; anon access is 100% valid. // Buuuuut s3gof3r doesn't seem to understand empty keys; it still sends them as if to login, and AWS says 403. So, foo. keys, err := s3gof3r.EnvKeys() if err != nil { panic(S3CredentialsMissingError.Wrap(err)) } // initialize reader from s3! getPath := warehousePathPrefix if warehouseCtntAddr { getPath = path.Join(warehousePathPrefix, string(dataHash)) } s3reader := makeS3reader(warehouseBucketName, getPath, keys) defer s3reader.Close() // prepare decompression as necessary reader, err := tartrans.Decompress(s3reader) if err != nil { panic(integrity.WarehouseConnectionError.New("could not start decompressing: %s", err)) } tarReader := tar.NewReader(reader) // Create staging arena to produce data into. arena.path, err = ioutil.TempDir(t.workPath, "") if err != nil { panic(integrity.TransmatError.New("Unable to create arena: %s", err)) } // walk input tar stream, placing data and accumulating hashes and metadata for integrity check bucket := &fshash.MemoryBucket{} tartrans.Extract(tarReader, arena.Path(), bucket, hasherFactory) // bucket processing may have created a root node if missing. if so, we need to apply its props. fs.PlaceFile(arena.Path(), bucket.Root().Metadata, nil) // hash whole tree actualTreeHash := fshash.Hash(bucket, hasherFactory) // verify total integrity expectedTreeHash, err := base64.URLEncoding.DecodeString(string(dataHash)) if err != nil { panic(integrity.ConfigError.New("Could not parse hash: %s", err)) } if bytes.Equal(actualTreeHash, expectedTreeHash) { // excellent, got what we asked for. arena.hash = dataHash } else { // this may or may not be grounds for panic, depending on configuration. if config.AcceptHashMismatch { // if we're tolerating mismatches, report the actual hash through different mechanisms. // you probably only ever want to use this in tests or debugging; in prod it's just asking for insanity. arena.hash = integrity.CommitID(actualTreeHash) } else { panic(integrity.NewHashMismatchError(string(dataHash), base64.URLEncoding.EncodeToString(actualTreeHash))) } } }).Catch(integrity.Error, func(err *errors.Error) { panic(err) }).CatchAll(func(err error) { panic(integrity.UnknownError.Wrap(err)) }).Done() return arena }
/* Arenas produced by Tar Transmats may be relocated by simple `mv`. */ func (t *TarTransmat) Materialize( kind integrity.TransmatKind, dataHash integrity.CommitID, siloURIs []integrity.SiloURI, options ...integrity.MaterializerConfigurer, ) integrity.Arena { var arena tarArena try.Do(func() { // Basic validation and config config := integrity.EvaluateConfig(options...) if kind != Kind { panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind)) } // Ping silos if len(siloURIs) < 1 { panic(integrity.ConfigError.New("Materialization requires at least one data source!")) // Note that it's possible a caching layer will satisfy things even without data sources... // but if that was going to happen, it already would have by now. } // Our policy is to take the first path that exists. // This lets you specify a series of potential locations, and if one is unavailable we'll just take the next. var stream io.Reader for _, uri := range siloURIs { try.Do(func() { stream = makeReader(dataHash, uri) }).Catch(integrity.DataDNE, func(err *errors.Error) { // fine, we'll just try the next one // TODO LOGGING }).Catch(integrity.WarehouseConnectionError, func(err *errors.Error) { // ... this does kind of seem to indicate we should have "warehouse offline or DNE" be separate from "tcp flaked after we shook on it yo" // for now we consider both fatal. revist this when we get smarter logging, etc panic(err) }).Done() if stream != nil { break } } if stream == nil { panic(integrity.WarehouseConnectionError.New("No warehouses were available!")) } // Wrap input stream with decompression as necessary reader, err := Decompress(stream) if err != nil { panic(integrity.WarehouseConnectionError.New("could not start decompressing: %s", err)) } tarReader := tar.NewReader(reader) // Create staging arena to produce data into. arena.path, err = ioutil.TempDir(t.workPath, "") if err != nil { panic(integrity.TransmatError.New("Unable to create arena: %s", err)) } // walk input tar stream, placing data and accumulating hashes and metadata for integrity check bucket := &fshash.MemoryBucket{} Extract(tarReader, arena.Path(), bucket, hasherFactory) // bucket processing may have created a root node if missing. if so, we need to apply its props. fs.PlaceFile(arena.Path(), bucket.Root().Metadata, nil) // hash whole tree actualTreeHash := fshash.Hash(bucket, hasherFactory) // verify total integrity expectedTreeHash, err := base64.URLEncoding.DecodeString(string(dataHash)) if err != nil { panic(integrity.ConfigError.New("Could not parse hash: %s", err)) } if bytes.Equal(actualTreeHash, expectedTreeHash) { // excellent, got what we asked for. arena.hash = dataHash } else { // this may or may not be grounds for panic, depending on configuration. if config.AcceptHashMismatch { // if we're tolerating mismatches, report the actual hash through different mechanisms. // you probably only ever want to use this in tests or debugging; in prod it's just asking for insanity. arena.hash = integrity.CommitID(actualTreeHash) } else { panic(integrity.NewHashMismatchError(string(dataHash), base64.URLEncoding.EncodeToString(actualTreeHash))) } } }).Catch(integrity.Error, func(err *errors.Error) { panic(err) }).CatchAll(func(err error) { panic(integrity.UnknownError.Wrap(err)) }).Done() return arena }