Esempio n. 1
0
/*
	Walks a filesystem.

	This is much like the standard library's `path/filepath.Walk`, except
	it's based on `treewalk`, which means it supports both pre- and post-order
	traversals.

	All paths begin in `./`, and directory names are slash-suffixed.
	E.g. you'll see a series like `{"./", "./a/", "./a/b"}`, etc.
	This matches the behaviors described by `Normalize` in the `lib/fshash`.

	Symlinks are not followed.

	The traversal order of siblings is *not* guaranteed, and is *not* necessarily
	stable.

	Caveat: calling `node.NextChild()` during your walk results in undefined behavior.
*/
func Walk(basePath string, preVisit WalkFunc, postVisit WalkFunc) error {
	return treewalk.Walk(
		newFileWalkNode(basePath, "./"),
		func(node treewalk.Node) error {
			filenode := node.(*FilewalkNode)
			if preVisit != nil {
				if err := preVisit(filenode); err != nil {
					return err
				}
			}
			return filenode.prepareChildren(basePath)
		},
		func(node treewalk.Node) error {
			filenode := node.(*FilewalkNode)
			var err error
			if postVisit != nil {
				err = postVisit(filenode)
			}
			filenode.forgetChildren()
			return err
		},
	)
}
Esempio n. 2
0
/*
	Walks the tree of files and metadata arrayed in a `Bucket` and
	constructs a tree hash over them.  The root of the tree hash is returned.
	The returned root has can be said to verify the integrity of the
	entire tree (much like a Merkle tree).

	The serial structure is expressed something like the following:

		{"node": $dir.metadata.hash,
		 "leaves": [
			{"node": $file1.metadata.hash, "content": $file1.contentHash},
			{"node": $subdir.metadata.hash,
			 "leaves": [ ... ]},
		 ]
		}

	This expression is made in cbor (rfc7049) format with indefinite-length
	arrays and a fixed order for all map fields.  Every structure starting
	with "node" is itself hashed and that value substituted in before
	hashing the parent.  Since the metadata hash contains the file/dir name,
	and the tree itself is traversed in sorted order, the entire structure
	is computed deterministically and unambiguously.
*/
func Hash(bucket Bucket, hasherFactory func() hash.Hash) []byte {
	// Hack around codec not exporting things very usefully -.-
	const magic_RAW = 0
	const magic_UTF8 = 1
	// Keep a count of how many nodes visited.  Cheap sanity check.
	var visitCount int
	// At every point in the visitation, children need to submit their hashes back up the tree.
	// Prime the pump with a special reaction for when the root returns; every directory preVisit attaches hoppers for children thereon.
	upsubs := make(upsubStack, 0)
	var finalAnswer []byte
	upsubs.Push(func(x []byte) {
		finalAnswer = x
	})
	// Also keep a stack of hashers in use because they jump across the pre/post visit gap.
	hashers := make(hasherStack, 0)
	// Visitor definitions
	preVisit := func(node treewalk.Node) error {
		record := node.(RecordIterator).Record()
		visitCount++
		hasher := hasherFactory()
		_, enc := codec.GenHelperEncoder(codec.NewEncoder(hasher, new(codec.CborHandle)))
		enc.EncodeMapStart(2) // either way it's header + one of leaves or contenthash
		enc.EncodeString(magic_UTF8, "m")
		record.Metadata.Marshal(hasher)
		//fmt.Printf(":::: %q ->\n\t%#v\n\t%s\n", record.Metadata.Name, record.Metadata, base64.URLEncoding.EncodeToString(hasher.Sum(nil))) // non-cascading
		if record.Metadata.Typeflag == tar.TypeDir {
			// open the "leaves" array
			// this may end up being an empty dir, but we act the same regardless
			// (and we don't have that information here since the iterator has tunnel vision)
			enc.EncodeString(magic_UTF8, "l")
			hasher.Write([]byte{codec.CborStreamArray})
			upsubs.Push(func(x []byte) {
				enc.EncodeStringBytes(magic_RAW, x)
			})
			hashers.Push(hasher)
		} else {
			// heap the object's content hash in
			enc.EncodeString(magic_UTF8, "h")
			enc.EncodeStringBytes(magic_RAW, record.ContentHash)
			// finalize our hash here and upsub to save us the work of hanging onto the hasher until the postvisit call
			upsubs.Peek()(hasher.Sum(nil))
		}
		return nil
	}
	postVisit := func(node treewalk.Node) error {
		record := node.(RecordIterator).Record()
		if record.Metadata.Typeflag == tar.TypeDir {
			hasher := hashers.Pop()
			// close off the "leaves" array
			// No map-close necessary because we used a fixed length map.
			hasher.Write([]byte{0xff}) // should be `codec.CborStreamBreak` but upstream has an export bug :/
			hash := hasher.Sum(nil)
			// debug
			//	if len(strings.Split(record.Metadata.Name, "/")) == 3 {
			//		fmt.Printf("::: hashing -- %q \t=> %s\n", record.Metadata.Name, base64.URLEncoding.EncodeToString(hash))
			//	}
			// pop out this dir's hoppers for children data
			upsubs.Pop()
			// hash and upsub
			upsubs.Peek()(hash)
		}
		return nil
	}
	// Traverse
	if err := treewalk.Walk(bucket.Iterator(), preVisit, postVisit); err != nil {
		panic(err) // none of our code has known believable error returns.
	}
	// Sanity check no node left behind
	_ = upsubs.Pop()
	if !upsubs.Empty() || !hashers.Empty() {
		panic(errors.ProgrammerError.New("invariant failed after bucket records walk: stacks not empty"))
	}
	if visitCount != bucket.Length() {
		panic(errors.ProgrammerError.New("invariant failed after bucket records walk: visited %d of %d nodes", visitCount, bucket.Length()))
	}
	// return the result upsubbed by the root
	return finalAnswer
}
Esempio n. 3
0
func Extract(tr *tar.Reader, destBasePath string, bucket fshash.Bucket, hasherFactory func() hash.Hash) {
	for {
		thdr, err := tr.Next()
		if err == io.EOF {
			break // end of archive
		}
		if err != nil {
			panic(integrity.WarehouseConnectionError.New("corrupt tar: %s", err))
		}
		hdr := fs.Metadata(*thdr)
		// filter/sanify values:
		// - names must be clean, relative dot-slash prefixed, and dirs slash-suffixed
		// - times should never be go's zero value; replace those with epoch
		// Note that names at this point should be handled by `path` (not `filepath`; these are canonical form for feed to hashing)
		hdr.Name = path.Clean(hdr.Name)
		if strings.HasPrefix(hdr.Name, "../") {
			panic(integrity.WarehouseConnectionError.New("corrupt tar: paths that use '../' to leave the base dir are invalid"))
		}
		if hdr.Name != "." {
			hdr.Name = "./" + hdr.Name
		}
		if hdr.ModTime.IsZero() {
			hdr.ModTime = def.Epochwhen
		}
		if hdr.AccessTime.IsZero() {
			hdr.AccessTime = def.Epochwhen
		}
		// conjure parents, if necessary.  tar format allows implicit parent dirs.
		// Note that if any of the implicitly conjured dirs is specified later, unpacking won't notice,
		// but bucket hashing iteration will (correctly) blow up for repeat entries.
		// It may well be possible to construct a tar like that, but it's already well established that
		// tars with repeated filenames are just asking for trouble and shall be rejected without
		// ceremony because they're just a ridiculous idea.
		parts := strings.Split(hdr.Name, "/")
		for i := range parts[:len(parts)-1] {
			i++
			_, err := os.Lstat(filepath.Join(append([]string{destBasePath}, parts[:i]...)...))
			// if it already exists, move along; if the error is anything interesting, let PlaceFile decide how to deal with it
			if err == nil || !os.IsNotExist(err) {
				continue
			}
			// if we're missing a dir, conjure a node with defaulted values (same as we do for "./")
			conjuredHdr := fshash.DefaultDirRecord().Metadata
			conjuredHdr.Name = strings.Join(parts[:i], "/") + "/" // path.Join does cleaning; unwanted.
			fs.PlaceFile(destBasePath, conjuredHdr, nil)
			bucket.Record(conjuredHdr, nil)
		}
		// place the file
		switch hdr.Typeflag {
		case tar.TypeReg:
			reader := &flak.HashingReader{tr, hasherFactory()}
			fs.PlaceFile(destBasePath, hdr, reader)
			bucket.Record(hdr, reader.Hasher.Sum(nil))
		case tar.TypeDir:
			hdr.Name += "/"
			fallthrough
		default:
			fs.PlaceFile(destBasePath, hdr, nil)
			bucket.Record(hdr, nil)
		}
	}
	// cleanup dir times with a post-order traversal over the bucket
	if err := treewalk.Walk(bucket.Iterator(), nil, func(node treewalk.Node) error {
		record := node.(fshash.RecordIterator).Record()
		if record.Metadata.Typeflag == tar.TypeDir {
			fs.PlaceDirTime(destBasePath, record.Metadata)
		}
		return nil
	}); err != nil {
		panic(err)
	}
}