Ejemplo n.º 1
0
func saveWalk(srcBasePath string, tw *tar.Writer, filterset filter.FilterSet, bucket fshash.Bucket, hasherFactory func() hash.Hash) error {
	preVisit := func(filenode *fs.FilewalkNode) error {
		if filenode.Err != nil {
			return filenode.Err
		}
		hdr, file := fs.ScanFile(srcBasePath, filenode.Path, filenode.Info)
		// apply filters.  on scans, this is pretty easy, all of em just apply to the stream in memory.
		hdr = filterset.Apply(hdr)
		// flaten time to seconds.  this tar writer impl doesn't do subsecond precision.
		// the writer will flatten it internally of course, but we need to do it here as well
		// so that the hash and the serial form are describing the same thing.
		hdr.ModTime = hdr.ModTime.Truncate(time.Second)
		wat := tar.Header(hdr) // this line is... we're not gonna talk about this.
		tw.WriteHeader(&wat)
		if file == nil {
			bucket.Record(hdr, nil)
		} else {
			defer file.Close()
			hasher := hasherFactory()
			tee := io.MultiWriter(tw, hasher)
			_, err := io.Copy(tee, file)
			if err != nil {
				return err
			}
			bucket.Record(hdr, hasher.Sum(nil))
		}
		return nil
	}
	return fs.Walk(srcBasePath, preVisit, nil)
}
Ejemplo n.º 2
0
func Extract(tr *tar.Reader, destBasePath string, bucket fshash.Bucket, hasherFactory func() hash.Hash) {
	for {
		thdr, err := tr.Next()
		if err == io.EOF {
			break // end of archive
		}
		if err != nil {
			panic(integrity.WarehouseConnectionError.New("corrupt tar: %s", err))
		}
		hdr := fs.Metadata(*thdr)
		// filter/sanify values:
		// - names must be clean, relative dot-slash prefixed, and dirs slash-suffixed
		// - times should never be go's zero value; replace those with epoch
		// Note that names at this point should be handled by `path` (not `filepath`; these are canonical form for feed to hashing)
		hdr.Name = path.Clean(hdr.Name)
		if strings.HasPrefix(hdr.Name, "../") {
			panic(integrity.WarehouseConnectionError.New("corrupt tar: paths that use '../' to leave the base dir are invalid"))
		}
		if hdr.Name != "." {
			hdr.Name = "./" + hdr.Name
		}
		if hdr.ModTime.IsZero() {
			hdr.ModTime = def.Epochwhen
		}
		if hdr.AccessTime.IsZero() {
			hdr.AccessTime = def.Epochwhen
		}
		// conjure parents, if necessary.  tar format allows implicit parent dirs.
		// Note that if any of the implicitly conjured dirs is specified later, unpacking won't notice,
		// but bucket hashing iteration will (correctly) blow up for repeat entries.
		// It may well be possible to construct a tar like that, but it's already well established that
		// tars with repeated filenames are just asking for trouble and shall be rejected without
		// ceremony because they're just a ridiculous idea.
		parts := strings.Split(hdr.Name, "/")
		for i := range parts[:len(parts)-1] {
			i++
			_, err := os.Lstat(filepath.Join(append([]string{destBasePath}, parts[:i]...)...))
			// if it already exists, move along; if the error is anything interesting, let PlaceFile decide how to deal with it
			if err == nil || !os.IsNotExist(err) {
				continue
			}
			// if we're missing a dir, conjure a node with defaulted values (same as we do for "./")
			conjuredHdr := fshash.DefaultDirRecord().Metadata
			conjuredHdr.Name = strings.Join(parts[:i], "/") + "/" // path.Join does cleaning; unwanted.
			fs.PlaceFile(destBasePath, conjuredHdr, nil)
			bucket.Record(conjuredHdr, nil)
		}
		// place the file
		switch hdr.Typeflag {
		case tar.TypeReg:
			reader := &flak.HashingReader{tr, hasherFactory()}
			fs.PlaceFile(destBasePath, hdr, reader)
			bucket.Record(hdr, reader.Hasher.Sum(nil))
		case tar.TypeDir:
			hdr.Name += "/"
			fallthrough
		default:
			fs.PlaceFile(destBasePath, hdr, nil)
			bucket.Record(hdr, nil)
		}
	}
	// cleanup dir times with a post-order traversal over the bucket
	if err := treewalk.Walk(bucket.Iterator(), nil, func(node treewalk.Node) error {
		record := node.(fshash.RecordIterator).Record()
		if record.Metadata.Typeflag == tar.TypeDir {
			fs.PlaceDirTime(destBasePath, record.Metadata)
		}
		return nil
	}); err != nil {
		panic(err)
	}
}