func saveWalk(srcBasePath string, tw *tar.Writer, filterset filter.FilterSet, bucket fshash.Bucket, hasherFactory func() hash.Hash) error { preVisit := func(filenode *fs.FilewalkNode) error { if filenode.Err != nil { return filenode.Err } hdr, file := fs.ScanFile(srcBasePath, filenode.Path, filenode.Info) // apply filters. on scans, this is pretty easy, all of em just apply to the stream in memory. hdr = filterset.Apply(hdr) // flaten time to seconds. this tar writer impl doesn't do subsecond precision. // the writer will flatten it internally of course, but we need to do it here as well // so that the hash and the serial form are describing the same thing. hdr.ModTime = hdr.ModTime.Truncate(time.Second) wat := tar.Header(hdr) // this line is... we're not gonna talk about this. tw.WriteHeader(&wat) if file == nil { bucket.Record(hdr, nil) } else { defer file.Close() hasher := hasherFactory() tee := io.MultiWriter(tw, hasher) _, err := io.Copy(tee, file) if err != nil { return err } bucket.Record(hdr, hasher.Sum(nil)) } return nil } return fs.Walk(srcBasePath, preVisit, nil) }
func Extract(tr *tar.Reader, destBasePath string, bucket fshash.Bucket, hasherFactory func() hash.Hash) { for { thdr, err := tr.Next() if err == io.EOF { break // end of archive } if err != nil { panic(integrity.WarehouseConnectionError.New("corrupt tar: %s", err)) } hdr := fs.Metadata(*thdr) // filter/sanify values: // - names must be clean, relative dot-slash prefixed, and dirs slash-suffixed // - times should never be go's zero value; replace those with epoch // Note that names at this point should be handled by `path` (not `filepath`; these are canonical form for feed to hashing) hdr.Name = path.Clean(hdr.Name) if strings.HasPrefix(hdr.Name, "../") { panic(integrity.WarehouseConnectionError.New("corrupt tar: paths that use '../' to leave the base dir are invalid")) } if hdr.Name != "." { hdr.Name = "./" + hdr.Name } if hdr.ModTime.IsZero() { hdr.ModTime = def.Epochwhen } if hdr.AccessTime.IsZero() { hdr.AccessTime = def.Epochwhen } // conjure parents, if necessary. tar format allows implicit parent dirs. // Note that if any of the implicitly conjured dirs is specified later, unpacking won't notice, // but bucket hashing iteration will (correctly) blow up for repeat entries. // It may well be possible to construct a tar like that, but it's already well established that // tars with repeated filenames are just asking for trouble and shall be rejected without // ceremony because they're just a ridiculous idea. parts := strings.Split(hdr.Name, "/") for i := range parts[:len(parts)-1] { i++ _, err := os.Lstat(filepath.Join(append([]string{destBasePath}, parts[:i]...)...)) // if it already exists, move along; if the error is anything interesting, let PlaceFile decide how to deal with it if err == nil || !os.IsNotExist(err) { continue } // if we're missing a dir, conjure a node with defaulted values (same as we do for "./") conjuredHdr := fshash.DefaultDirRecord().Metadata conjuredHdr.Name = strings.Join(parts[:i], "/") + "/" // path.Join does cleaning; unwanted. fs.PlaceFile(destBasePath, conjuredHdr, nil) bucket.Record(conjuredHdr, nil) } // place the file switch hdr.Typeflag { case tar.TypeReg: reader := &flak.HashingReader{tr, hasherFactory()} fs.PlaceFile(destBasePath, hdr, reader) bucket.Record(hdr, reader.Hasher.Sum(nil)) case tar.TypeDir: hdr.Name += "/" fallthrough default: fs.PlaceFile(destBasePath, hdr, nil) bucket.Record(hdr, nil) } } // cleanup dir times with a post-order traversal over the bucket if err := treewalk.Walk(bucket.Iterator(), nil, func(node treewalk.Node) error { record := node.(fshash.RecordIterator).Record() if record.Metadata.Typeflag == tar.TypeDir { fs.PlaceDirTime(destBasePath, record.Metadata) } return nil }); err != nil { panic(err) } }