Exemplo n.º 1
0
// Walks `basePath`, hashing it, pushing the encoded tar to `file`, and returning the final hash.
func Save(file io.Writer, basePath string, filterset filter.FilterSet, hasherFactory func() hash.Hash) string {
	// walk filesystem, copying and accumulating data for integrity check
	bucket := &fshash.MemoryBucket{}
	tarWriter := tar.NewWriter(file)
	defer tarWriter.Close()
	if err := saveWalk(basePath, tarWriter, filterset, bucket, hasherFactory); err != nil {
		panic(err) // TODO this is not well typed, and does not clearly indicate whether scanning or committing had the problem
	}

	// hash whole tree
	actualTreeHash := fshash.Hash(bucket, hasherFactory)

	// report
	return base64.URLEncoding.EncodeToString(actualTreeHash)
}
Exemplo n.º 2
0
/*
	Arenas produced by Dir Transmats may be relocated by simple `mv`.
*/
func (t *DirTransmat) Materialize(
	kind integrity.TransmatKind,
	dataHash integrity.CommitID,
	siloURIs []integrity.SiloURI,
	options ...integrity.MaterializerConfigurer,
) integrity.Arena {
	var arena dirArena
	try.Do(func() {
		// Basic validation and config
		config := integrity.EvaluateConfig(options...)
		if kind != Kind {
			panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind))
		}

		// Ping silos
		if len(siloURIs) < 1 {
			panic(integrity.ConfigError.New("Materialization requires at least one data source!"))
			// Note that it's possible a caching layer will satisfy things even without data sources...
			//  but if that was going to happen, it already would have by now.
		}
		// Our policy is to take the first path that exists.
		//  This lets you specify a series of potential locations,
		var siloURI integrity.SiloURI
		for _, givenURI := range siloURIs {
			// TODO still assuming all local paths and not doing real uri parsing
			localPath := string(givenURI)
			_, err := os.Stat(localPath)
			if os.IsNotExist(err) {
				continue
			}
			siloURI = givenURI
			break
		}
		if siloURI == "" {
			panic(integrity.WarehouseConnectionError.New("No warehouses were available!"))
		}

		// Create staging arena to produce data into.
		var err error
		arena.path, err = ioutil.TempDir(t.workPath, "")
		if err != nil {
			panic(integrity.TransmatError.New("Unable to create arena: %s", err))
		}

		// walk filesystem, copying and accumulating data for integrity check
		hasherFactory := sha512.New384
		bucket := &fshash.MemoryBucket{}
		localPath := string(siloURI)
		if err := fshash.FillBucket(localPath, arena.Path(), bucket, filter.FilterSet{}, hasherFactory); err != nil {
			panic(err)
		}

		// hash whole tree
		actualTreeHash := fshash.Hash(bucket, hasherFactory)

		// verify total integrity
		expectedTreeHash, err := base64.URLEncoding.DecodeString(string(dataHash))
		if err != nil {
			panic(integrity.ConfigError.New("Could not parse hash: %s", err))
		}
		if bytes.Equal(actualTreeHash, expectedTreeHash) {
			// excellent, got what we asked for.
			arena.hash = dataHash
		} else {
			// this may or may not be grounds for panic, depending on configuration.
			if config.AcceptHashMismatch {
				// if we're tolerating mismatches, report the actual hash through different mechanisms.
				// you probably only ever want to use this in tests or debugging; in prod it's just asking for insanity.
				arena.hash = integrity.CommitID(actualTreeHash)
			} else {
				panic(integrity.NewHashMismatchError(string(dataHash), base64.URLEncoding.EncodeToString(actualTreeHash)))
			}
		}
	}).Catch(integrity.Error, func(err *errors.Error) {
		panic(err)
	}).CatchAll(func(err error) {
		panic(integrity.UnknownError.Wrap(err))
	}).Done()
	return arena
}
Exemplo n.º 3
0
/*
	Arenas produced by Dir Transmats may be relocated by simple `mv`.
*/
func (t *S3Transmat) Materialize(
	kind integrity.TransmatKind,
	dataHash integrity.CommitID,
	siloURIs []integrity.SiloURI,
	options ...integrity.MaterializerConfigurer,
) integrity.Arena {
	var arena dirArena
	try.Do(func() {
		// Basic validation and config
		config := integrity.EvaluateConfig(options...)
		if kind != Kind {
			panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind))
		}

		// Parse URI; Find warehouses.
		if len(siloURIs) < 1 {
			panic(integrity.ConfigError.New("Materialization requires at least one data source!"))
			// Note that it's possible a caching layer will satisfy things even without data sources...
			//  but if that was going to happen, it already would have by now.
		}
		// Our policy is to take the first path that exists.
		//  This lets you specify a series of potential locations, and if one is unavailable we'll just take the next.
		var warehouseBucketName string
		var warehousePathPrefix string
		var warehouseCtntAddr bool
		for _, givenURI := range siloURIs {
			u, err := url.Parse(string(givenURI))
			if err != nil {
				panic(integrity.ConfigError.New("failed to parse URI: %s", err))
			}
			warehouseBucketName = u.Host
			warehousePathPrefix = u.Path
			switch u.Scheme {
			case "s3":
				warehouseCtntAddr = false
			case "s3+splay":
				warehouseCtntAddr = true
			default:
				panic(integrity.ConfigError.New("unrecognized scheme: %q", u.Scheme))
			}
			// TODO figure out how to check for data (or at least warehouse!) presence;
			//  currently just assuming the first one's golden, and blowing up later if it's not.
			break
		}
		if warehouseBucketName == "" {
			panic(integrity.WarehouseConnectionError.New("No warehouses were available!"))
		}

		// load keys from env
		// TODO someday URIs should grow smart enough to control this in a more general fashion -- but for now, host ENV is actually pretty feasible and plays easily with others.
		// TODO should not require keys!  we're just reading, after all; anon access is 100% valid.
		//   Buuuuut s3gof3r doesn't seem to understand empty keys; it still sends them as if to login, and AWS says 403.  So, foo.
		keys, err := s3gof3r.EnvKeys()
		if err != nil {
			panic(S3CredentialsMissingError.Wrap(err))
		}

		// initialize reader from s3!
		getPath := warehousePathPrefix
		if warehouseCtntAddr {
			getPath = path.Join(warehousePathPrefix, string(dataHash))
		}
		s3reader := makeS3reader(warehouseBucketName, getPath, keys)
		defer s3reader.Close()
		// prepare decompression as necessary
		reader, err := tartrans.Decompress(s3reader)
		if err != nil {
			panic(integrity.WarehouseConnectionError.New("could not start decompressing: %s", err))
		}
		tarReader := tar.NewReader(reader)

		// Create staging arena to produce data into.
		arena.path, err = ioutil.TempDir(t.workPath, "")
		if err != nil {
			panic(integrity.TransmatError.New("Unable to create arena: %s", err))
		}

		// walk input tar stream, placing data and accumulating hashes and metadata for integrity check
		bucket := &fshash.MemoryBucket{}
		tartrans.Extract(tarReader, arena.Path(), bucket, hasherFactory)

		// bucket processing may have created a root node if missing.  if so, we need to apply its props.
		fs.PlaceFile(arena.Path(), bucket.Root().Metadata, nil)

		// hash whole tree
		actualTreeHash := fshash.Hash(bucket, hasherFactory)

		// verify total integrity
		expectedTreeHash, err := base64.URLEncoding.DecodeString(string(dataHash))
		if err != nil {
			panic(integrity.ConfigError.New("Could not parse hash: %s", err))
		}
		if bytes.Equal(actualTreeHash, expectedTreeHash) {
			// excellent, got what we asked for.
			arena.hash = dataHash
		} else {
			// this may or may not be grounds for panic, depending on configuration.
			if config.AcceptHashMismatch {
				// if we're tolerating mismatches, report the actual hash through different mechanisms.
				// you probably only ever want to use this in tests or debugging; in prod it's just asking for insanity.
				arena.hash = integrity.CommitID(actualTreeHash)
			} else {
				panic(integrity.NewHashMismatchError(string(dataHash), base64.URLEncoding.EncodeToString(actualTreeHash)))
			}
		}
	}).Catch(integrity.Error, func(err *errors.Error) {
		panic(err)
	}).CatchAll(func(err error) {
		panic(integrity.UnknownError.Wrap(err))
	}).Done()
	return arena
}
Exemplo n.º 4
0
func (t DirTransmat) Scan(
	kind integrity.TransmatKind,
	subjectPath string,
	siloURIs []integrity.SiloURI,
	options ...integrity.MaterializerConfigurer,
) integrity.CommitID {
	var commitID integrity.CommitID
	try.Do(func() {
		// Basic validation and config
		config := integrity.EvaluateConfig(options...)
		if kind != Kind {
			panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind))
		}

		// If scan area doesn't exist, bail immediately.
		// No need to even start dialing warehouses if we've got nothing for em.
		_, err := os.Stat(subjectPath)
		if err != nil {
			if os.IsNotExist(err) {
				return // empty commitID
			} else {
				panic(err)
			}
		}

		// Parse save locations.
		// This transmat only supports one output location at a time due
		//  to Old code we haven't invested in refactoring yet.
		var localPath string
		if len(siloURIs) == 0 {
			localPath = "" // empty string is a well known value to `fshash.FillBucket`: means just hash, don't copy.
		} else if len(siloURIs) == 1 {
			// TODO still assuming all local paths and not doing real uri parsing
			localPath = string(siloURIs[0])
			err := os.MkdirAll(filepath.Dir(localPath), 0755)
			if err != nil {
				panic(integrity.WarehouseConnectionError.New("Unable to write file: %s", err))
			}
		} else {
			panic(integrity.ConfigError.New("%s transmat only supports shipping to 1 warehouse", Kind))
		}

		// walk filesystem, copying and accumulating data for integrity check
		bucket := &fshash.MemoryBucket{}
		err = fshash.FillBucket(subjectPath, localPath, bucket, config.FilterSet, hasherFactory)
		if err != nil {
			panic(err) // TODO this is not well typed, and does not clearly indicate whether scanning or committing had the problem
		}

		// hash whole tree
		actualTreeHash := fshash.Hash(bucket, hasherFactory)

		// report
		commitID = integrity.CommitID(base64.URLEncoding.EncodeToString(actualTreeHash))
	}).Catch(integrity.Error, func(err *errors.Error) {
		panic(err)
	}).CatchAll(func(err error) {
		panic(integrity.UnknownError.Wrap(err))
	}).Done()
	return commitID
}
Exemplo n.º 5
0
/*
	Arenas produced by Tar Transmats may be relocated by simple `mv`.
*/
func (t *TarTransmat) Materialize(
	kind integrity.TransmatKind,
	dataHash integrity.CommitID,
	siloURIs []integrity.SiloURI,
	options ...integrity.MaterializerConfigurer,
) integrity.Arena {
	var arena tarArena
	try.Do(func() {
		// Basic validation and config
		config := integrity.EvaluateConfig(options...)
		if kind != Kind {
			panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind))
		}

		// Ping silos
		if len(siloURIs) < 1 {
			panic(integrity.ConfigError.New("Materialization requires at least one data source!"))
			// Note that it's possible a caching layer will satisfy things even without data sources...
			//  but if that was going to happen, it already would have by now.
		}
		// Our policy is to take the first path that exists.
		//  This lets you specify a series of potential locations, and if one is unavailable we'll just take the next.
		var stream io.Reader
		for _, uri := range siloURIs {
			try.Do(func() {
				stream = makeReader(dataHash, uri)
			}).Catch(integrity.DataDNE, func(err *errors.Error) {
				// fine, we'll just try the next one
				// TODO LOGGING
			}).Catch(integrity.WarehouseConnectionError, func(err *errors.Error) {
				// ... this does kind of seem to indicate we should have "warehouse offline or DNE" be separate from "tcp flaked after we shook on it yo"
				// for now we consider both fatal.  revist this when we get smarter logging, etc
				panic(err)
			}).Done()
			if stream != nil {
				break
			}
		}
		if stream == nil {
			panic(integrity.WarehouseConnectionError.New("No warehouses were available!"))
		}

		// Wrap input stream with decompression as necessary
		reader, err := Decompress(stream)
		if err != nil {
			panic(integrity.WarehouseConnectionError.New("could not start decompressing: %s", err))
		}
		tarReader := tar.NewReader(reader)

		// Create staging arena to produce data into.
		arena.path, err = ioutil.TempDir(t.workPath, "")
		if err != nil {
			panic(integrity.TransmatError.New("Unable to create arena: %s", err))
		}

		// walk input tar stream, placing data and accumulating hashes and metadata for integrity check
		bucket := &fshash.MemoryBucket{}
		Extract(tarReader, arena.Path(), bucket, hasherFactory)

		// bucket processing may have created a root node if missing.  if so, we need to apply its props.
		fs.PlaceFile(arena.Path(), bucket.Root().Metadata, nil)

		// hash whole tree
		actualTreeHash := fshash.Hash(bucket, hasherFactory)

		// verify total integrity
		expectedTreeHash, err := base64.URLEncoding.DecodeString(string(dataHash))
		if err != nil {
			panic(integrity.ConfigError.New("Could not parse hash: %s", err))
		}
		if bytes.Equal(actualTreeHash, expectedTreeHash) {
			// excellent, got what we asked for.
			arena.hash = dataHash
		} else {
			// this may or may not be grounds for panic, depending on configuration.
			if config.AcceptHashMismatch {
				// if we're tolerating mismatches, report the actual hash through different mechanisms.
				// you probably only ever want to use this in tests or debugging; in prod it's just asking for insanity.
				arena.hash = integrity.CommitID(actualTreeHash)
			} else {
				panic(integrity.NewHashMismatchError(string(dataHash), base64.URLEncoding.EncodeToString(actualTreeHash)))
			}
		}
	}).Catch(integrity.Error, func(err *errors.Error) {
		panic(err)
	}).CatchAll(func(err error) {
		panic(integrity.UnknownError.Wrap(err))
	}).Done()
	return arena
}