// Runs a function with a tempdir, cleaning up afterward. func WithDir(f func(string), dirs ...string) { if len(dirs) < 1 { panic(errors.ProgrammerError.New("Must have at least one sub-folder for tempdir")) } tempPath := filepath.Join(dirs...) // Tempdir wants parent path to exist err := os.MkdirAll(tempPath, 0755) if err != nil { panic(errors.IOError.Wrap(err)) } try.Do(func() { f(tempPath) }).Finally(func() { err := os.RemoveAll(tempPath) if err != nil { // TODO: we don't want to panic here, more like a debug log entry, "failed to remove tempdir." // Can accomplish once we add logging. panic(errors.IOError.Wrap(err)) } }).Done() }
func CopyingPlacer(srcBasePath, destBasePath string, _ bool) integrity.Emplacement { srcBaseStat, err := os.Stat(srcBasePath) if err != nil || !srcBaseStat.IsDir() { panic(Error.New("copyingplacer: srcPath %q must be dir: %s", srcBasePath, err)) } destBaseStat, err := os.Stat(destBasePath) if err != nil || !destBaseStat.IsDir() { panic(Error.New("copyingplacer: destPath %q must be dir: %s", destBasePath, err)) } // remove any files already here (to emulate behavior like an overlapping mount) // (can't take the easy route and just `os.RemoveAll(destBasePath)` because that propagates times changes onto the parent.) d, err := os.Open(destBasePath) if err != nil { panic(Error.New("copyingplacer: io error: %s", err)) } names, err := d.Readdirnames(-1) if err != nil { panic(Error.New("copyingplacer: io error: %s", err)) } for _, name := range names { err := os.RemoveAll(filepath.Join(destBasePath, name)) if err != nil { panic(Error.New("copyingplacer: io error: %s", err)) } } // walk and copy preVisit := func(filenode *fs.FilewalkNode) error { if filenode.Err != nil { return filenode.Err } hdr, file := fs.ScanFile(srcBasePath, filenode.Path, filenode.Info) if file != nil { defer file.Close() } fs.PlaceFile(destBasePath, hdr, file) return nil } postVisit := func(filenode *fs.FilewalkNode) error { if filenode.Info.IsDir() { if err := fspatch.UtimesNano(filepath.Join(destBasePath, filenode.Path), def.Epochwhen, filenode.Info.ModTime()); err != nil { return err } } return nil } try.Do(func() { if err := fs.Walk(srcBasePath, preVisit, postVisit); err != nil { panic(err) } }).CatchAll(func(err error) { panic(Error.New("copyingplacer: io failed: %s", err)) }).Done() return copyEmplacement{path: destBasePath} }
func (t TarTransmat) Scan( kind integrity.TransmatKind, subjectPath string, siloURIs []integrity.SiloURI, options ...integrity.MaterializerConfigurer, ) integrity.CommitID { var commitID integrity.CommitID try.Do(func() { // Basic validation and config config := integrity.EvaluateConfig(options...) if kind != Kind { panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind)) } // If scan area doesn't exist, bail immediately. // No need to even start dialing warehouses if we've got nothing for em. _, err := os.Stat(subjectPath) if err != nil { if os.IsNotExist(err) { return // empty commitID } else { panic(err) } } // Open output streams for writing. // Since these are all behaving as just one `io.Writer` stream, this could maybe be factored out. // Error handling is currently "anything -> panic". This should probably be more resilient. (That might need another refactor so we have an upload call per remote.) controllers := make([]StreamingWarehouseWriteController, 0) writers := make([]io.Writer, 0) for _, uri := range siloURIs { controller := makeWriteController(uri) controllers = append(controllers, controller) writers = append(writers, controller.Writer()) } stream := io.MultiWriter(writers...) if len(writers) < 1 { stream = ioutil.Discard } // walk, fwrite, hash commitID = integrity.CommitID(Save(stream, subjectPath, config.FilterSet, hasherFactory)) // commit for _, controller := range controllers { controller.Commit(commitID) } }).Catch(integrity.Error, func(err *errors.Error) { panic(err) }).CatchAll(func(err error) { panic(integrity.UnknownError.Wrap(err)) }).Done() return commitID }
// Run inputs func ProvisionInputs(transmat integrity.Transmat, assemblerFn integrity.Assembler, inputs []def.Input, rootfs string, journal log15.Logger) integrity.Assembly { // start having all filesystems filesystems := make(map[def.Input]integrity.Arena, len(inputs)) fsGather := make(chan map[def.Input]materializerReport) for _, in := range inputs { go func(in def.Input) { try.Do(func() { journal.Info(fmt.Sprintf("Starting materialize for %s hash=%s", in.Type, in.Hash)) arena := transmat.Materialize( integrity.TransmatKind(in.Type), integrity.CommitID(in.Hash), []integrity.SiloURI{integrity.SiloURI(in.URI)}, ) journal.Info(fmt.Sprintf("Finished materialize for %s hash=%s", in.Type, in.Hash)) fsGather <- map[def.Input]materializerReport{ in: {Arena: arena}, } }).Catch(integrity.Error, func(err *errors.Error) { journal.Warn(fmt.Sprintf("Errored during materialize for %s hash=%s", in.Type, in.Hash), "error", err.Message()) fsGather <- map[def.Input]materializerReport{ in: {Err: err}, } }).Done() }(in) } // (we don't have any output setup at this point, but if we do in the future, that'll be here.) // gather materialized inputs for range inputs { for in, report := range <-fsGather { if report.Err != nil { panic(report.Err) } filesystems[in] = report.Arena } } journal.Info("All inputs acquired... starting assembly") // assemble them into the final tree assemblyParts := make([]integrity.AssemblyPart, 0, len(filesystems)) for input, arena := range filesystems { assemblyParts = append(assemblyParts, integrity.AssemblyPart{ SourcePath: arena.Path(), TargetPath: input.Location, Writable: true, // TODO input config should have a word about this }) } assembly := assemblerFn(rootfs, assemblyParts) journal.Info("Assembly complete!") return assembly }
// Executes a job, catching any panics. func (e *Executor) Run(f def.Formula, j def.Job, d string, stdin io.Reader, outS, errS io.WriteCloser, journal log15.Logger) def.JobResult { r := def.JobResult{ ID: j.Id(), ExitCode: -1, } try.Do(func() { e.Execute(f, j, d, &r, outS, errS, journal) }).Catch(executor.Error, func(err *errors.Error) { r.Error = err }).Catch(integrity.Error, func(err *errors.Error) { r.Error = err }).CatchAll(func(err error) { r.Error = executor.UnknownError.Wrap(err).(*errors.Error) }).Done() return r }
func (t GitTransmat) Scan( kind integrity.TransmatKind, subjectPath string, siloURIs []integrity.SiloURI, options ...integrity.MaterializerConfigurer, ) integrity.CommitID { var commitID integrity.CommitID try.Do(func() { // Basic validation and config //config := integrity.EvaluateConfig(options...) if kind != Kind { panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind)) } // Get off my lawn. panic(errors.NotImplementedError.New("The git transmat does not support scan.")) }).Catch(integrity.Error, func(err *errors.Error) { panic(err) }).CatchAll(func(err error) { panic(integrity.UnknownError.Wrap(err)) }).Done() return commitID }
/* 'actual' should be a `func()`; 'expected' should be an `*errors.ErrorClass`; we'll run the function, and check that it panics, and that the error is under the umbrella of the error class. */ func ShouldPanicWith(actual interface{}, expected ...interface{}) string { fn, ok := actual.(func()) if !ok { return fmt.Sprintf("You must provide a `func()` as the first argument to this assertion; got `%T`", actual) } var errClass *errors.ErrorClass switch len(expected) { case 0: return "You must provide a spacemonkey `ErrorClass` as the expectation parameter to this assertion." case 1: cls, ok := expected[0].(*errors.ErrorClass) if !ok { return "You must provide a spacemonkey `ErrorClass` as the expectation parameter to this assertion." } errClass = cls default: return "You must provide one parameter as an expectation to this assertion." } var caught error try.Do( fn, ).CatchAll(func(err error) { caught = err }).Done() if caught == nil { return fmt.Sprintf("Expected error to be of class %q but no error was raised!", errClass.String()) } spaceClass := errors.GetClass(caught) if spaceClass.Is(errClass) { return "" } return fmt.Sprintf("Expected error to be of class %q but it had %q instead! (Full message: %s)", errClass.String(), spaceClass.String(), caught.Error()) }
/* Arenas produced by Dir Transmats may be relocated by simple `mv`. */ func (t *S3Transmat) Materialize( kind integrity.TransmatKind, dataHash integrity.CommitID, siloURIs []integrity.SiloURI, options ...integrity.MaterializerConfigurer, ) integrity.Arena { var arena dirArena try.Do(func() { // Basic validation and config config := integrity.EvaluateConfig(options...) if kind != Kind { panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind)) } // Parse URI; Find warehouses. if len(siloURIs) < 1 { panic(integrity.ConfigError.New("Materialization requires at least one data source!")) // Note that it's possible a caching layer will satisfy things even without data sources... // but if that was going to happen, it already would have by now. } // Our policy is to take the first path that exists. // This lets you specify a series of potential locations, and if one is unavailable we'll just take the next. var warehouseBucketName string var warehousePathPrefix string var warehouseCtntAddr bool for _, givenURI := range siloURIs { u, err := url.Parse(string(givenURI)) if err != nil { panic(integrity.ConfigError.New("failed to parse URI: %s", err)) } warehouseBucketName = u.Host warehousePathPrefix = u.Path switch u.Scheme { case "s3": warehouseCtntAddr = false case "s3+splay": warehouseCtntAddr = true default: panic(integrity.ConfigError.New("unrecognized scheme: %q", u.Scheme)) } // TODO figure out how to check for data (or at least warehouse!) presence; // currently just assuming the first one's golden, and blowing up later if it's not. break } if warehouseBucketName == "" { panic(integrity.WarehouseConnectionError.New("No warehouses were available!")) } // load keys from env // TODO someday URIs should grow smart enough to control this in a more general fashion -- but for now, host ENV is actually pretty feasible and plays easily with others. // TODO should not require keys! we're just reading, after all; anon access is 100% valid. // Buuuuut s3gof3r doesn't seem to understand empty keys; it still sends them as if to login, and AWS says 403. So, foo. keys, err := s3gof3r.EnvKeys() if err != nil { panic(S3CredentialsMissingError.Wrap(err)) } // initialize reader from s3! getPath := warehousePathPrefix if warehouseCtntAddr { getPath = path.Join(warehousePathPrefix, string(dataHash)) } s3reader := makeS3reader(warehouseBucketName, getPath, keys) defer s3reader.Close() // prepare decompression as necessary reader, err := tartrans.Decompress(s3reader) if err != nil { panic(integrity.WarehouseConnectionError.New("could not start decompressing: %s", err)) } tarReader := tar.NewReader(reader) // Create staging arena to produce data into. arena.path, err = ioutil.TempDir(t.workPath, "") if err != nil { panic(integrity.TransmatError.New("Unable to create arena: %s", err)) } // walk input tar stream, placing data and accumulating hashes and metadata for integrity check bucket := &fshash.MemoryBucket{} tartrans.Extract(tarReader, arena.Path(), bucket, hasherFactory) // bucket processing may have created a root node if missing. if so, we need to apply its props. fs.PlaceFile(arena.Path(), bucket.Root().Metadata, nil) // hash whole tree actualTreeHash := fshash.Hash(bucket, hasherFactory) // verify total integrity expectedTreeHash, err := base64.URLEncoding.DecodeString(string(dataHash)) if err != nil { panic(integrity.ConfigError.New("Could not parse hash: %s", err)) } if bytes.Equal(actualTreeHash, expectedTreeHash) { // excellent, got what we asked for. arena.hash = dataHash } else { // this may or may not be grounds for panic, depending on configuration. if config.AcceptHashMismatch { // if we're tolerating mismatches, report the actual hash through different mechanisms. // you probably only ever want to use this in tests or debugging; in prod it's just asking for insanity. arena.hash = integrity.CommitID(actualTreeHash) } else { panic(integrity.NewHashMismatchError(string(dataHash), base64.URLEncoding.EncodeToString(actualTreeHash))) } } }).Catch(integrity.Error, func(err *errors.Error) { panic(err) }).CatchAll(func(err error) { panic(integrity.UnknownError.Wrap(err)) }).Done() return arena }
// Execute a formula in a specified directory. MAY PANIC. func (e *Executor) Execute(f def.Formula, j def.Job, d string, result *def.JobResult, outS, errS io.WriteCloser, journal log15.Logger) { // Dedicated rootfs folder to distinguish container from nsinit noise rootfs := filepath.Join(d, "rootfs") // nsinit wants to have a logfile logFile := filepath.Join(d, "nsinit-debug.log") // Prep command args := []string{} // Global options: // --root will place the 'nsinit' folder (holding a state.json file) in d // --log-file does much the same with a log file (unsure if care?) // --debug allegedly enables debug output in the log file args = append(args, "--root", d, "--log-file", logFile, "--debug") // Subcommand, and tell nsinit to not desire a JSON file (instead just use many flergs) args = append(args, "exec", "--create") // Use the host's networking (no bridge, no namespaces, etc) args = append(args, "--net=host") // Where our system image exists args = append(args, "--rootfs", rootfs) // Set cwd args = append(args, "--cwd", f.Accents.Cwd) // Add all desired environment variables for k, v := range f.Accents.Env { args = append(args, "--env", k+"="+v) } // Unroll command args args = append(args, f.Accents.Entrypoint...) // Prepare command to exec cmd := exec.Command("nsinit", args...) cmd.Stdin = nil cmd.Stdout = outS cmd.Stderr = errS // Prepare filesystem transmat := util.DefaultTransmat() assembly := util.ProvisionInputs( transmat, util.BestAssembler(), f.Inputs, rootfs, journal, ) defer assembly.Teardown() // What ever happens: Disassemble filesystem util.ProvisionOutputs(f.Outputs, rootfs, journal) // launch execution. // transform gosh's typed errors to repeatr's hierarchical errors. // this is... not untroubled code: since we're invoking a helper that's then // proxying the exec even further, most errors are fatal (the mapping here is // very different than in e.g. chroot executor, and provides much less meaning). var proc gosh.Proc try.Do(func() { proc = gosh.ExecProcCmd(cmd) }).CatchAll(func(err error) { switch err.(type) { case gosh.NoSuchCommandError: panic(executor.ConfigError.New("nsinit binary is missing")) case gosh.NoArgumentsError: panic(executor.UnknownError.Wrap(err)) case gosh.NoSuchCwdError: panic(executor.UnknownError.Wrap(err)) case gosh.ProcMonitorError: panic(executor.TaskExecError.Wrap(err)) default: panic(executor.UnknownError.Wrap(err)) } }).Done() // Wait for the job to complete // REVIEW: consider exposing `gosh.Proc`'s interface as part of repeatr's job tracking api? result.ExitCode = proc.GetExitCode() // Horrifyingly ambiguous attempts to detect failure modes from inside nsinit. // This can only be made correct by pushing patches into nsinit to use another channel for control data reporting that is completely separated from user data flows. // (Or, arguably, putting another layer of control processes as the first parent inside nsinit, but that's ducktape within a ducktape mesh; let's not.) // Certain program outputs may be incorrectly attributed as launch failure, though this should be... "unlikely". // Also note that if we ever switch to non-blocking execution, this will become even more of a mess: we won't be able to tell if exec failed, esp. in the case of e.g. a long running process with no output, and so we won't know when it's safe to return. // TODO handle the following leading strings: // - "exec: \"%s\": executable file not found in $PATH\n" // - "no such file or directory\n" // this will probably require rejiggering a whole bunch of stuff so that the streamer is reachable down here. // Save outputs result.Outputs = util.PreserveOutputs(transmat, f.Outputs, rootfs, journal) }
/* Arenas produced by Dir Transmats may be relocated by simple `mv`. */ func (t *TarExecTransmat) Materialize( kind integrity.TransmatKind, dataHash integrity.CommitID, siloURIs []integrity.SiloURI, options ...integrity.MaterializerConfigurer, ) integrity.Arena { var arena dirArena try.Do(func() { // Basic validation and config if !(kind == Kind || kind == "exec-tar") { panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind)) } // Ping silos if len(siloURIs) < 1 { panic(integrity.ConfigError.New("Materialization requires at least one data source!")) // Note that it's possible a caching layer will satisfy things even without data sources... // but if that was going to happen, it already would have by now. } // Our policy is to take the first path that exists. // This lets you specify a series of potential locations, and if one is unavailable we'll just take the next. var siloURI integrity.SiloURI for _, givenURI := range siloURIs { // TODO still assuming all local paths and not doing real uri parsing localPath := string(givenURI) _, err := os.Stat(localPath) if os.IsNotExist(err) { // TODO it'd be awfully lovely if we could log the attempt somewhere continue } siloURI = givenURI break } if siloURI == "" { panic(integrity.WarehouseConnectionError.New("No warehouses were available!")) } // Open the input stream; preparing decompression as necessary file, err := os.OpenFile(string(siloURI), os.O_RDONLY, 0755) if err != nil { panic(integrity.WarehouseConnectionError.New("Unable to read file: %s", err)) } file.Close() // just checking, so we can (try to) give a more pleasant error than tar barf // Create staging arena to produce data into. arena.path, err = ioutil.TempDir(t.workPath, "") if err != nil { panic(integrity.TransmatError.New("Unable to create arena: %s", err)) } // exec tar. // in case of a zero (a.k.a. success) exit, this returns silently. // in case of a non-zero exit, this panics; the panic will include the output. gosh.Gosh( "tar", "-xf", string(siloURI), "-C", arena.Path(), gosh.NullIO, ).RunAndReport() // note: indeed, we never check the hash field. this is *not* a compliant implementation of an input. }).Catch(integrity.Error, func(err *errors.Error) { panic(err) }).CatchAll(func(err error) { panic(integrity.UnknownError.Wrap(err)) }).Done() return arena }
func (t TarExecTransmat) Scan( kind integrity.TransmatKind, subjectPath string, siloURIs []integrity.SiloURI, options ...integrity.MaterializerConfigurer, ) integrity.CommitID { try.Do(func() { // Basic validation and config if kind != Kind { panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind)) } // If scan area doesn't exist, bail immediately. // No need to even start dialing warehouses if we've got nothing for em. _, err := os.Stat(subjectPath) if err != nil { if os.IsNotExist(err) { return // empty commitID } else { panic(err) } } // Parse save locations. // (Most transmats do... significantly smarter things than this backwater.) var localPath string if len(siloURIs) == 0 { localPath = "/dev/null" } else if len(siloURIs) == 1 { // TODO still assuming all local paths and not doing real uri parsing localPath = string(siloURIs[0]) err := os.MkdirAll(filepath.Dir(localPath), 0755) if err != nil { panic(integrity.WarehouseConnectionError.New("Unable to write file: %s", err)) } file, err := os.OpenFile(localPath, os.O_CREATE|os.O_WRONLY, 0644) if err != nil { panic(integrity.WarehouseConnectionError.New("Unable to write file: %s", err)) } file.Close() // just checking, so we can (try to) give a more pleasant error than tar barf } else { panic(integrity.ConfigError.New("%s transmat only supports shipping to 1 warehouse", Kind)) } // exec tar. // in case of a zero (a.k.a. success) exit, this returns silently. // in case of a non-zero exit, this panics; the panic will include the output. gosh.Gosh( "tar", "-cf", localPath, "--xform", "s,"+strings.TrimLeft(subjectPath, "/")+",.,", subjectPath, gosh.NullIO, ).RunAndReport() }).Catch(integrity.Error, func(err *errors.Error) { panic(err) }).CatchAll(func(err error) { panic(integrity.UnknownError.Wrap(err)) }).Done() return "" }
/* Git transmats plonk down the contents of one commit (or tree) as a filesystem. A fileset materialized by git does *not* include the `.git` dir by default, since those files are not themselves part of what's described by the hash. Git effectively "filters" out several attributes -- permissions are only loosely respected (execution only), file timestamps are undefined, uid/gid bits are not tracked, xattrs are not tracked, etc. If you desired defined values, *you must still configure materialization to use a filter* (particularly for file timestamps, since they will otherwise be allowed to vary from one materialization to the next(!)). Git also allows for several other potential pitfalls with lossless data transmission: git cannot transmit empty directories. This can be a major pain. Typical workarounds include creating a ".gitkeep" file in the empty directory. Gitignore files may also inadventantly cause trouble. Transmat.Materialize will act *consistently*, but it does not overcome these issues in git (doing so would require additional metadata or protocol extensions). This transmat is *not* currently well optimized, and should generally be assumed to be re-cloning on all materializations -- specifically, it is not smart enough to recognize requests for different commits and trees from the same repos in order to save reclones. */ func (t *GitTransmat) Materialize( kind integrity.TransmatKind, dataHash integrity.CommitID, siloURIs []integrity.SiloURI, options ...integrity.MaterializerConfigurer, ) integrity.Arena { var arena gitArena try.Do(func() { // Basic validation and config //config := integrity.EvaluateConfig(options...) if kind != Kind { panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind)) } // Ping silos if len(siloURIs) < 1 { panic(integrity.ConfigError.New("Materialization requires at least one data source!")) // Note that it's possible a caching layer will satisfy things even without data sources... // but if that was going to happen, it already would have by now. } // Our policy is to take the first path that exists. // This lets you specify a series of potential locations, var siloURI integrity.SiloURI for _, givenURI := range siloURIs { // shell out to git and ask it if it thinks there's a repo here // TODO this and all future shellouts does NOT SUFFICIENTLY ISOLATE either config or secret keeping yet. // TODO it's probably not productive to try to parse all git uris, but we should detect relative local fs paths and shitcan them at least localPath := string(givenURI) // TODO there's no "--" in ls-remote, so... we should forbid things starting in "-", i guess? // or use "file://" religiously? but no, bc ssh doesn't look like "ssh://" all the time... ugh, i do not want to write a git url parser // update: yeah, using "file://" religiously is not an option. this actually takes a *different* path than `/non/protocol/prefixed`. not significantly, but it may impact e.g. hardlinking, iiuc // TODO someday go for the usability buff of parsing git errors into something more helpful code := git.Bake( "ls-remote", localPath, gosh.Opts{OkExit: []int{0, 128}}, ).RunAndReport().GetExitCode() // code 128 means no connection. // any other code we currently panic on (with stderr attached, but it's still ugly). if code != 0 { continue } siloURI = givenURI break } if siloURI == "" { panic(integrity.WarehouseConnectionError.New("No warehouses were available!")) } // Create staging arena to produce data into. var err error arena.gitDirPath, err = ioutil.TempDir(t.workPath, "") if err != nil { panic(integrity.TransmatError.New("Unable to create arena: %s", err)) } arena.workDirPath, err = ioutil.TempDir(t.workPath, "") if err != nil { panic(integrity.TransmatError.New("Unable to create arena: %s", err)) } // From now on, all our git commands will have these overriden paths: // This gives us a working tree without ".git". git := git.Bake( gosh.Opts{Env: map[string]string{ "GIT_DIR": arena.gitDirPath, "GIT_WORK_TREE": arena.workDirPath, }}, ) // Clone! // TODO make sure all the check hard modes are enabled git.Bake( "clone", "--bare", "--", string(siloURI), arena.gitDirPath, ).RunAndReport() // Checkout the interesting commit. buf := &bytes.Buffer{} p := git.Bake( "checkout", string(dataHash), // FIXME dear god, whitelist this to make sure it looks like a hash. gosh.Opts{Cwd: arena.workDirPath}, gosh.Opts{OkExit: gosh.AnyExit}, gosh.Opts{Err: buf, Out: buf}, ).Run() if bytes.HasPrefix(buf.Bytes(), []byte("fatal: reference is not a tree: ")) { panic(integrity.DataDNE.New("hash %q not found in this repo", dataHash)) } if p.GetExitCode() != 0 { // catchall. // this formatting is *terrible*, but we don't have a good formatter for using datakeys, either, so. // (blowing past this without too much fuss because we're going to switch error libraries later and it's going to fix this better.) panic(Error.New("git checkout failed. git output:\n%s", buf.String())) } // And, do submodules. git.Bake( "submodule", "update", "--init", gosh.Opts{Cwd: arena.workDirPath}, ).RunAndReport() // verify total integrity // actually this is a nil step; there's no such thing as "acceptHashMismatch", clone would have simply failed arena.hash = dataHash }).Catch(integrity.Error, func(err *errors.Error) { panic(err) }).CatchAll(func(err error) { panic(integrity.UnknownError.Wrap(err)) }).Done() return arena }
/* Arenas produced by Dir Transmats may be relocated by simple `mv`. */ func (t *DirTransmat) Materialize( kind integrity.TransmatKind, dataHash integrity.CommitID, siloURIs []integrity.SiloURI, options ...integrity.MaterializerConfigurer, ) integrity.Arena { var arena dirArena try.Do(func() { // Basic validation and config config := integrity.EvaluateConfig(options...) if kind != Kind { panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind)) } // Ping silos if len(siloURIs) < 1 { panic(integrity.ConfigError.New("Materialization requires at least one data source!")) // Note that it's possible a caching layer will satisfy things even without data sources... // but if that was going to happen, it already would have by now. } // Our policy is to take the first path that exists. // This lets you specify a series of potential locations, var siloURI integrity.SiloURI for _, givenURI := range siloURIs { // TODO still assuming all local paths and not doing real uri parsing localPath := string(givenURI) _, err := os.Stat(localPath) if os.IsNotExist(err) { continue } siloURI = givenURI break } if siloURI == "" { panic(integrity.WarehouseConnectionError.New("No warehouses were available!")) } // Create staging arena to produce data into. var err error arena.path, err = ioutil.TempDir(t.workPath, "") if err != nil { panic(integrity.TransmatError.New("Unable to create arena: %s", err)) } // walk filesystem, copying and accumulating data for integrity check hasherFactory := sha512.New384 bucket := &fshash.MemoryBucket{} localPath := string(siloURI) if err := fshash.FillBucket(localPath, arena.Path(), bucket, filter.FilterSet{}, hasherFactory); err != nil { panic(err) } // hash whole tree actualTreeHash := fshash.Hash(bucket, hasherFactory) // verify total integrity expectedTreeHash, err := base64.URLEncoding.DecodeString(string(dataHash)) if err != nil { panic(integrity.ConfigError.New("Could not parse hash: %s", err)) } if bytes.Equal(actualTreeHash, expectedTreeHash) { // excellent, got what we asked for. arena.hash = dataHash } else { // this may or may not be grounds for panic, depending on configuration. if config.AcceptHashMismatch { // if we're tolerating mismatches, report the actual hash through different mechanisms. // you probably only ever want to use this in tests or debugging; in prod it's just asking for insanity. arena.hash = integrity.CommitID(actualTreeHash) } else { panic(integrity.NewHashMismatchError(string(dataHash), base64.URLEncoding.EncodeToString(actualTreeHash))) } } }).Catch(integrity.Error, func(err *errors.Error) { panic(err) }).CatchAll(func(err error) { panic(integrity.UnknownError.Wrap(err)) }).Done() return arena }
func (t DirTransmat) Scan( kind integrity.TransmatKind, subjectPath string, siloURIs []integrity.SiloURI, options ...integrity.MaterializerConfigurer, ) integrity.CommitID { var commitID integrity.CommitID try.Do(func() { // Basic validation and config config := integrity.EvaluateConfig(options...) if kind != Kind { panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind)) } // If scan area doesn't exist, bail immediately. // No need to even start dialing warehouses if we've got nothing for em. _, err := os.Stat(subjectPath) if err != nil { if os.IsNotExist(err) { return // empty commitID } else { panic(err) } } // Parse save locations. // This transmat only supports one output location at a time due // to Old code we haven't invested in refactoring yet. var localPath string if len(siloURIs) == 0 { localPath = "" // empty string is a well known value to `fshash.FillBucket`: means just hash, don't copy. } else if len(siloURIs) == 1 { // TODO still assuming all local paths and not doing real uri parsing localPath = string(siloURIs[0]) err := os.MkdirAll(filepath.Dir(localPath), 0755) if err != nil { panic(integrity.WarehouseConnectionError.New("Unable to write file: %s", err)) } } else { panic(integrity.ConfigError.New("%s transmat only supports shipping to 1 warehouse", Kind)) } // walk filesystem, copying and accumulating data for integrity check bucket := &fshash.MemoryBucket{} err = fshash.FillBucket(subjectPath, localPath, bucket, config.FilterSet, hasherFactory) if err != nil { panic(err) // TODO this is not well typed, and does not clearly indicate whether scanning or committing had the problem } // hash whole tree actualTreeHash := fshash.Hash(bucket, hasherFactory) // report commitID = integrity.CommitID(base64.URLEncoding.EncodeToString(actualTreeHash)) }).Catch(integrity.Error, func(err *errors.Error) { panic(err) }).CatchAll(func(err error) { panic(integrity.UnknownError.Wrap(err)) }).Done() return commitID }
// Run inputs func ProvisionInputs(transmat integrity.Transmat, assemblerFn integrity.Assembler, inputs []def.Input, rootfs string, journal log15.Logger) integrity.Assembly { // start having all filesystems // input names are used as keys, so must be unique inputsByName := make(map[string]def.Input, len(inputs)) for _, in := range inputs { // TODO checks should also be sooner, up in cfg parse // but this check is for programmatic access as well (errors down the line can get nonobvious if you skip this). if _, ok := inputsByName[in.Name]; ok { panic(errors.ProgrammerError.New("duplicate name in input config")) } inputsByName[in.Name] = in } filesystems := make(map[string]integrity.Arena, len(inputs)) fsGather := make(chan map[string]materializerReport) for _, in := range inputs { go func(in def.Input) { try.Do(func() { journal.Info(fmt.Sprintf("Starting materialize for %s hash=%s", in.Type, in.Hash)) // todo: create validity checking api for URIs, check them all before launching anything warehouses := make([]integrity.SiloURI, len(in.Warehouses)) for i, wh := range in.Warehouses { warehouses[i] = integrity.SiloURI(wh) } // invoke transmat (blocking, potentially long time) arena := transmat.Materialize( integrity.TransmatKind(in.Type), integrity.CommitID(in.Hash), warehouses, ) // submit report journal.Info(fmt.Sprintf("Finished materialize for %s hash=%s", in.Type, in.Hash)) fsGather <- map[string]materializerReport{ in.Name: {Arena: arena}, } }).Catch(integrity.Error, func(err *errors.Error) { journal.Warn(fmt.Sprintf("Errored during materialize for %s hash=%s", in.Type, in.Hash), "error", err.Message()) fsGather <- map[string]materializerReport{ in.Name: {Err: err}, } }).Done() }(in) } // (we don't have any output setup at this point, but if we do in the future, that'll be here.) // gather materialized inputs for range inputs { for name, report := range <-fsGather { if report.Err != nil { panic(report.Err) } filesystems[name] = report.Arena } } journal.Info("All inputs acquired... starting assembly") // assemble them into the final tree assemblyParts := make([]integrity.AssemblyPart, 0, len(filesystems)) for name, arena := range filesystems { assemblyParts = append(assemblyParts, integrity.AssemblyPart{ SourcePath: arena.Path(), TargetPath: inputsByName[name].MountPath, Writable: true, // TODO input config should have a word about this }) } assembly := assemblerFn(rootfs, assemblyParts) journal.Info("Assembly complete!") return assembly }
// Run outputs func PreserveOutputs(transmat integrity.Transmat, outputs []def.Output, rootfs string, journal log15.Logger) []def.Output { // run commit on the outputs scanGather := make(chan scanReport) for _, out := range outputs { go func(out def.Output) { filterOptions := make([]integrity.MaterializerConfigurer, 0, 3) out.Filters.InitDefaultsOutput() switch out.Filters.UidMode { case def.FilterKeep: // easy, just no filter. case def.FilterUse: f := filter.UidFilter{out.Filters.Uid} filterOptions = append(filterOptions, integrity.UseFilter(f)) default: panic(errors.ProgrammerError.New("unhandled filter mode %v", out.Filters.UidMode)) } switch out.Filters.GidMode { case def.FilterKeep: // easy, just no filter. case def.FilterUse: f := filter.GidFilter{out.Filters.Gid} filterOptions = append(filterOptions, integrity.UseFilter(f)) default: panic(errors.ProgrammerError.New("unhandled filter mode %v", out.Filters.GidMode)) } switch out.Filters.MtimeMode { case def.FilterKeep: // easy, just no filter. case def.FilterUse: f := filter.MtimeFilter{out.Filters.Mtime} filterOptions = append(filterOptions, integrity.UseFilter(f)) default: panic(errors.ProgrammerError.New("unhandled filter mode %v", out.Filters.MtimeMode)) } scanPath := filepath.Join(rootfs, out.MountPath) journal.Info(fmt.Sprintf("Starting scan on %q", scanPath)) try.Do(func() { // todo: create validity checking api for URIs, check them all before launching anything warehouses := make([]integrity.SiloURI, len(out.Warehouses)) for i, wh := range out.Warehouses { warehouses[i] = integrity.SiloURI(wh) } // invoke transmat (blocking, potentially long time) commitID := transmat.Scan( integrity.TransmatKind(out.Type), scanPath, warehouses, filterOptions..., ) out.Hash = string(commitID) // submit report journal.Info(fmt.Sprintf("Finished scan on %q", scanPath)) scanGather <- scanReport{Output: out} }).Catch(integrity.Error, func(err *errors.Error) { journal.Warn(fmt.Sprintf("Errored scan on %q", scanPath), "error", err.Message()) scanGather <- scanReport{Err: err} }).Done() }(out) } // gather reports var results []def.Output for range outputs { report := <-scanGather if report.Err != nil { panic(report.Err) } results = append(results, report.Output) } return results }
// Run outputs func PreserveOutputs(transmat integrity.Transmat, outputs []def.Output, rootfs string, journal log15.Logger) []def.Output { // run commit on the outputs scanGather := make(chan scanReport) for _, out := range outputs { go func(out def.Output) { filterOptions := make([]integrity.MaterializerConfigurer, 0, 4) for _, name := range out.Filters { cfg := strings.Fields(name) switch cfg[0] { case "uid": f := filter.UidFilter{} if len(cfg) > 1 { f.Value, _ = strconv.Atoi(cfg[1]) } filterOptions = append(filterOptions, integrity.UseFilter(f)) case "gid": f := filter.GidFilter{} if len(cfg) > 1 { f.Value, _ = strconv.Atoi(cfg[1]) } filterOptions = append(filterOptions, integrity.UseFilter(f)) case "mtime": f := filter.MtimeFilter{} if len(cfg) > 1 { f.Value, _ = time.Parse(time.RFC3339, cfg[1]) } filterOptions = append(filterOptions, integrity.UseFilter(f)) default: continue } } scanPath := filepath.Join(rootfs, out.Location) journal.Info(fmt.Sprintf("Starting scan on %q", scanPath)) try.Do(func() { // TODO: following is hack; badly need to update config parsing to understand this first-class warehouseCoordsList := make([]integrity.SiloURI, 0) if out.URI != "" { warehouseCoordsList = append(warehouseCoordsList, integrity.SiloURI(out.URI)) } // invoke transmat commitID := transmat.Scan( integrity.TransmatKind(out.Type), scanPath, warehouseCoordsList, filterOptions..., ) out.Hash = string(commitID) journal.Info(fmt.Sprintf("Finished scan on %q", scanPath)) scanGather <- scanReport{Output: out} }).Catch(integrity.Error, func(err *errors.Error) { journal.Warn(fmt.Sprintf("Errored scan on %q", scanPath), "error", err.Message()) scanGather <- scanReport{Err: err} }).Done() }(out) } // gather reports var results []def.Output for range outputs { report := <-scanGather if report.Err != nil { panic(report.Err) } results = append(results, report.Output) } return results }
func (t S3Transmat) Scan( kind integrity.TransmatKind, subjectPath string, siloURIs []integrity.SiloURI, options ...integrity.MaterializerConfigurer, ) integrity.CommitID { var commitID integrity.CommitID try.Do(func() { // Basic validation and config config := integrity.EvaluateConfig(options...) if kind != Kind { panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind)) } // If scan area doesn't exist, bail immediately. // No need to even start dialing warehouses if we've got nothing for em. _, err := os.Stat(subjectPath) if err != nil { if os.IsNotExist(err) { return // empty commitID } else { panic(err) } } // load keys from env // TODO someday URIs should grow smart enough to control this in a more general fashion -- but for now, host ENV is actually pretty feasible and plays easily with others. keys, err := s3gof3r.EnvKeys() if err != nil { panic(S3CredentialsMissingError.Wrap(err)) } // Parse URI; Find warehouses; Open output streams for writing. // Since these are all behaving as just one `io.Writer` stream, this could maybe be factored out. // Error handling is currently "anything -> panic". This should probably be more resilient. (That might need another refactor so we have an upload call per remote.) // TODO : both this and the tar code that has a similar single stream idea should use an interface // And that interface should have a concept of mv so we can make atomic commits. // I'm not doing multiple URIs here until we get that, because the io.Writer interface just // doesn't cut it like it did for tars (and really, it's ignoring a major issue to use it there, too). // ...F**k it, we're gonna do it controllers := make([]*s3warehousePut, 0) writers := make([]io.Writer, 0) // this is dumb, but we end up making one of these to satisfy the type conversation for MultiWriter anyway for _, givenURI := range siloURIs { u, err := url.Parse(string(givenURI)) if err != nil { panic(integrity.ConfigError.New("failed to parse URI: %s", err)) } controller := &s3warehousePut{} controller.bucketName = u.Host controller.pathPrefix = u.Path var ctntAddr bool switch u.Scheme { case "s3": ctntAddr = false case "s3+splay": ctntAddr = true default: panic(integrity.ConfigError.New("unrecognized scheme: %q", u.Scheme)) } // dial it and initialize writer to s3! // if the URI indicated splay behavior, first stream data to {$bucketName}:{dirname($storePath)}/.tmp.upload.{basename($storePath)}.{random()}; // this allows us to start uploading before the final hash is determined and relocate it later. // for direct paths, upload into place, because aws already manages atomicity at that scale (and they don't have a rename or copy operation that's free, because uh...? no time to implement it since 2006, apparently). controller.keys = keys if ctntAddr { controller.tmpPath = path.Join( path.Dir(controller.pathPrefix), ".tmp.upload."+path.Base(controller.pathPrefix)+"."+guid.New(), ) controller.stream = makeS3writer(controller.bucketName, controller.tmpPath, keys) } else { controller.stream = makeS3writer(controller.bucketName, controller.pathPrefix, keys) } controllers = append(controllers, controller) writers = append(writers, controller.stream) } stream := io.MultiWriter(writers...) if len(writers) < 1 { stream = ioutil.Discard } // walk, fwrite, hash commitID = integrity.CommitID(tartrans.Save(stream, subjectPath, config.FilterSet, hasherFactory)) // commit for _, controller := range controllers { controller.Commit(string(commitID)) } }).Catch(integrity.Error, func(err *errors.Error) { panic(err) }).CatchAll(func(err error) { panic(integrity.UnknownError.Wrap(err)) }).Done() return commitID }
/* Arenas produced by Tar Transmats may be relocated by simple `mv`. */ func (t *TarTransmat) Materialize( kind integrity.TransmatKind, dataHash integrity.CommitID, siloURIs []integrity.SiloURI, options ...integrity.MaterializerConfigurer, ) integrity.Arena { var arena tarArena try.Do(func() { // Basic validation and config config := integrity.EvaluateConfig(options...) if kind != Kind { panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind)) } // Ping silos if len(siloURIs) < 1 { panic(integrity.ConfigError.New("Materialization requires at least one data source!")) // Note that it's possible a caching layer will satisfy things even without data sources... // but if that was going to happen, it already would have by now. } // Our policy is to take the first path that exists. // This lets you specify a series of potential locations, and if one is unavailable we'll just take the next. var stream io.Reader for _, uri := range siloURIs { try.Do(func() { stream = makeReader(dataHash, uri) }).Catch(integrity.DataDNE, func(err *errors.Error) { // fine, we'll just try the next one // TODO LOGGING }).Catch(integrity.WarehouseConnectionError, func(err *errors.Error) { // ... this does kind of seem to indicate we should have "warehouse offline or DNE" be separate from "tcp flaked after we shook on it yo" // for now we consider both fatal. revist this when we get smarter logging, etc panic(err) }).Done() if stream != nil { break } } if stream == nil { panic(integrity.WarehouseConnectionError.New("No warehouses were available!")) } // Wrap input stream with decompression as necessary reader, err := Decompress(stream) if err != nil { panic(integrity.WarehouseConnectionError.New("could not start decompressing: %s", err)) } tarReader := tar.NewReader(reader) // Create staging arena to produce data into. arena.path, err = ioutil.TempDir(t.workPath, "") if err != nil { panic(integrity.TransmatError.New("Unable to create arena: %s", err)) } // walk input tar stream, placing data and accumulating hashes and metadata for integrity check bucket := &fshash.MemoryBucket{} Extract(tarReader, arena.Path(), bucket, hasherFactory) // bucket processing may have created a root node if missing. if so, we need to apply its props. fs.PlaceFile(arena.Path(), bucket.Root().Metadata, nil) // hash whole tree actualTreeHash := fshash.Hash(bucket, hasherFactory) // verify total integrity expectedTreeHash, err := base64.URLEncoding.DecodeString(string(dataHash)) if err != nil { panic(integrity.ConfigError.New("Could not parse hash: %s", err)) } if bytes.Equal(actualTreeHash, expectedTreeHash) { // excellent, got what we asked for. arena.hash = dataHash } else { // this may or may not be grounds for panic, depending on configuration. if config.AcceptHashMismatch { // if we're tolerating mismatches, report the actual hash through different mechanisms. // you probably only ever want to use this in tests or debugging; in prod it's just asking for insanity. arena.hash = integrity.CommitID(actualTreeHash) } else { panic(integrity.NewHashMismatchError(string(dataHash), base64.URLEncoding.EncodeToString(actualTreeHash))) } } }).Catch(integrity.Error, func(err *errors.Error) { panic(err) }).CatchAll(func(err error) { panic(integrity.UnknownError.Wrap(err)) }).Done() return arena }
// Execute a formula in a specified directory. MAY PANIC. func (e *Executor) Execute(f def.Formula, j def.Job, d string, result *def.JobResult, stdin io.Reader, outS, errS io.WriteCloser, journal log15.Logger) { // Prepare filesystem rootfs := filepath.Join(d, "rootfs") transmat := util.DefaultTransmat() assembly := util.ProvisionInputs( transmat, util.BestAssembler(), f.Inputs, rootfs, journal, ) defer assembly.Teardown() // What ever happens: Disassemble filesystem util.ProvisionOutputs(f.Outputs, rootfs, journal) // chroot's are pretty easy. cmdName := f.Accents.Entrypoint[0] cmd := exec.Command(cmdName, f.Accents.Entrypoint[1:]...) cmd.SysProcAttr = &syscall.SysProcAttr{ Chroot: rootfs, Pdeathsig: syscall.SIGKILL, } // except handling cwd is a little odd. // see comments in gosh tests with chroot for information about the odd behavior we're hacking around here; // we're comfortable making this special check here, but not upstreaming it to gosh, because in our context we "know" we're not racing anyone. if externalCwdStat, err := os.Stat(filepath.Join(rootfs, f.Accents.Cwd)); err != nil { panic(executor.TaskExecError.New("cannot set cwd to %q: %s", f.Accents.Cwd, err.(*os.PathError).Err)) } else if !externalCwdStat.IsDir() { panic(executor.TaskExecError.New("cannot set cwd to %q: not a dir", f.Accents.Cwd)) } cmd.Dir = f.Accents.Cwd // set env. // initialization already required by earlier 'validate' calls. cmd.Env = envToSlice(f.Accents.Env) cmd.Stdin = stdin cmd.Stdout = outS cmd.Stderr = errS // launch execution. // transform gosh's typed errors to repeatr's hierarchical errors. var proc gosh.Proc try.Do(func() { proc = gosh.ExecProcCmd(cmd) }).CatchAll(func(err error) { switch err.(type) { case gosh.NoSuchCommandError: panic(executor.NoSuchCommandError.Wrap(err)) case gosh.NoArgumentsError: panic(executor.NoSuchCommandError.Wrap(err)) case gosh.NoSuchCwdError: // included for clarity and completeness, but we'll never actually see this; see comments in gosh about the interaction of chroot and cwd error handling. panic(executor.TaskExecError.Wrap(err)) case gosh.ProcMonitorError: panic(executor.TaskExecError.Wrap(err)) default: panic(executor.UnknownError.Wrap(err)) } }).Done() // Wait for the job to complete // REVIEW: consider exposing `gosh.Proc`'s interface as part of repeatr's job tracking api? result.ExitCode = proc.GetExitCode() // Save outputs result.Outputs = util.PreserveOutputs(transmat, f.Outputs, rootfs, journal) }
func main() { try.Do(func() { cli.Main(os.Args, os.Stderr, os.Stdout) }).Catch(cli.Exit, func(err *errors.Error) { // Errors marked as valid user-facing issues get a nice // pretty-printed route out, and may include specified exit codes. fmt.Fprintf(os.Stderr, "%s\n", err.Message()) // exit, taking the specified code if any. code := errors.GetData(err, cli.ExitCodeKey) if code == nil { os.Exit(int(0)) } os.Exit(int(code.(cli.ExitCode))) }).Catch(cli.Error, func(err *errors.Error) { // Errors marked as valid user-facing issues get a nice // pretty-printed route out, and may include specified exit codes. if isDebugMode() { // in debug-mode, repanic all the way to death so that we get all of golang's built in log features. panic(err) } else { // print nicely. fmt.Fprintf(os.Stderr, "Repeatr was unable to complete your request!\n"+ "%s\n", err.Message()) // exit, taking the specified code if any. code := errors.GetData(err, cli.ExitCodeKey) if code == nil { os.Exit(int(cli.EXIT_USER)) } os.Exit(int(code.(cli.ExitCode))) } }).CatchAll(func(err error) { // Errors that aren't marked as valid user-facing issues should be // logged in preparation for a bug report. if isDebugMode() { // in debug-mode, repanic all the way to death so that we get all of golang's built in log features. panic(err) } else { // save the error to a file. we want to keep the stacks, but not scare away the user. logPath, saveErr := saveErrorReport(err) var saveMsg string if saveErr == nil { saveMsg = fmt.Sprintf("We've logged the full error to a file: %q. Please include this in the report.", logPath) } else { saveMsg = fmt.Sprintf("Additionally, we were unable to save a full log of the problem (\"%s\").", saveErr) } fmt.Fprintf(os.Stderr, "Repeatr encountered a serious issue and was unable to complete your request!\n"+ "Please file an issue to help us fix it.\n"+ "%s\n"+ "\n"+ "This is the short version of the problem:\n"+ "%s\n", saveMsg, errors.GetMessage(err), ) os.Exit(int(cli.EXIT_UNKNOWNPANIC)) } }).Done() }