func main() { flag.Parse() for _, arg := range flag.Args() { fh, err := os.Open(arg) if err != nil { log.Fatal(err) } defer fh.Close() fi, err := fh.Stat() if err != nil { log.Fatal(err) } fmt.Printf("inspecting %q (size %dk)\n", fh.Name(), fi.Size()/1024) packFh, err := ioutil.TempFile("", "packed.") if err != nil { log.Fatal(err) } defer packFh.Close() if *flCleanup { defer os.Remove(packFh.Name()) } sp := storage.NewJSONPacker(packFh) fp := storage.NewDiscardFilePutter() dissam, err := asm.NewInputTarStream(fh, sp, fp) if err != nil { log.Fatal(err) } var num int tr := tar.NewReader(dissam) for { _, err = tr.Next() if err != nil { if err == io.EOF { break } log.Fatal(err) } num++ if _, err := io.Copy(ioutil.Discard, tr); err != nil { log.Fatal(err) } } fmt.Printf(" -- number of files: %d\n", num) if err := packFh.Sync(); err != nil { log.Fatal(err) } fi, err = packFh.Stat() if err != nil { log.Fatal(err) } fmt.Printf(" -- size of metadata uncompressed: %dk\n", fi.Size()/1024) gzPackFh, err := ioutil.TempFile("", "packed.gz.") if err != nil { log.Fatal(err) } defer gzPackFh.Close() if *flCleanup { defer os.Remove(gzPackFh.Name()) } gzWrtr := gzip.NewWriter(gzPackFh) if _, err := packFh.Seek(0, 0); err != nil { log.Fatal(err) } if _, err := io.Copy(gzWrtr, packFh); err != nil { log.Fatal(err) } gzWrtr.Close() if err := gzPackFh.Sync(); err != nil { log.Fatal(err) } fi, err = gzPackFh.Stat() if err != nil { log.Fatal(err) } fmt.Printf(" -- size of gzip compressed metadata: %dk\n", fi.Size()/1024) } }
// NewInputTarStream wraps the Reader stream of a tar archive and provides a // Reader stream of the same. // // In the middle it will pack the segments and file metadata to storage.Packer // `p`. // // The the storage.FilePutter is where payload of files in the stream are // stashed. If this stashing is not needed, you can provide a nil // storage.FilePutter. Since the checksumming is still needed, then a default // of NewDiscardFilePutter will be used internally func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) { // What to do here... folks will want their own access to the Reader that is // their tar archive stream, but we'll need that same stream to use our // forked 'archive/tar'. // Perhaps do an io.TeeReader that hands back an io.Reader for them to read // from, and we'll MITM the stream to store metadata. // We'll need a storage.FilePutter too ... // Another concern, whether to do any storage.FilePutter operations, such that we // don't extract any amount of the archive. But then again, we're not making // files/directories, hardlinks, etc. Just writing the io to the storage.FilePutter. // Perhaps we have a DiscardFilePutter that is a bit bucket. // we'll return the pipe reader, since TeeReader does not buffer and will // only read what the outputRdr Read's. Since Tar archives have padding on // the end, we want to be the one reading the padding, even if the user's // `archive/tar` doesn't care. pR, pW := io.Pipe() outputRdr := io.TeeReader(r, pW) // we need a putter that will generate the crc64 sums of file payloads if fp == nil { fp = storage.NewDiscardFilePutter() } go func() { tr := tar.NewReader(outputRdr) tr.RawAccounting = true for { hdr, err := tr.Next() if err != nil { if err != io.EOF { pW.CloseWithError(err) return } // even when an EOF is reached, there is often 1024 null bytes on // the end of an archive. Collect them too. _, err := p.AddEntry(storage.Entry{ Type: storage.SegmentType, Payload: tr.RawBytes(), }) if err != nil { pW.CloseWithError(err) } break // not return. We need the end of the reader. } if hdr == nil { break // not return. We need the end of the reader. } if _, err := p.AddEntry(storage.Entry{ Type: storage.SegmentType, Payload: tr.RawBytes(), }); err != nil { pW.CloseWithError(err) } var csum []byte if hdr.Size > 0 { var err error _, csum, err = fp.Put(hdr.Name, tr) if err != nil { pW.CloseWithError(err) } } // File entries added, regardless of size _, err = p.AddEntry(storage.Entry{ Type: storage.FileType, Name: hdr.Name, Size: hdr.Size, Payload: csum, }) if err != nil { pW.CloseWithError(err) } if b := tr.RawBytes(); len(b) > 0 { _, err = p.AddEntry(storage.Entry{ Type: storage.SegmentType, Payload: b, }) if err != nil { pW.CloseWithError(err) } } } // it is allowable, and not uncommon that there is further padding on the // end of an archive, apart from the expected 1024 null bytes. remainder, err := ioutil.ReadAll(outputRdr) if err != nil && err != io.EOF { pW.CloseWithError(err) } _, err = p.AddEntry(storage.Entry{ Type: storage.SegmentType, Payload: remainder, }) if err != nil { pW.CloseWithError(err) } else { pW.Close() } }() return pR, nil }
func CommandChecksize(c *cli.Context) { if len(c.Args()) == 0 { logrus.Fatalf("please specify tar archives to check ('-' will check stdin)") } for _, arg := range c.Args() { fh, err := os.Open(arg) if err != nil { log.Fatal(err) } defer fh.Close() fi, err := fh.Stat() if err != nil { log.Fatal(err) } fmt.Printf("inspecting %q (size %dk)\n", fh.Name(), fi.Size()/1024) packFh, err := ioutil.TempFile("", "packed.") if err != nil { log.Fatal(err) } defer packFh.Close() if !c.Bool("work") { defer os.Remove(packFh.Name()) } else { fmt.Printf(" -- working file preserved: %s\n", packFh.Name()) } sp := storage.NewJSONPacker(packFh) fp := storage.NewDiscardFilePutter() dissam, err := asm.NewInputTarStream(fh, sp, fp) if err != nil { log.Fatal(err) } var num int tr := tar.NewReader(dissam) for { _, err = tr.Next() if err != nil { if err == io.EOF { break } log.Fatal(err) } num++ if _, err := io.Copy(ioutil.Discard, tr); err != nil { log.Fatal(err) } } fmt.Printf(" -- number of files: %d\n", num) if err := packFh.Sync(); err != nil { log.Fatal(err) } fi, err = packFh.Stat() if err != nil { log.Fatal(err) } fmt.Printf(" -- size of metadata uncompressed: %dk\n", fi.Size()/1024) gzPackFh, err := ioutil.TempFile("", "packed.gz.") if err != nil { log.Fatal(err) } defer gzPackFh.Close() if !c.Bool("work") { defer os.Remove(gzPackFh.Name()) } gzWrtr := gzip.NewWriter(gzPackFh) if _, err := packFh.Seek(0, 0); err != nil { log.Fatal(err) } if _, err := io.Copy(gzWrtr, packFh); err != nil { log.Fatal(err) } gzWrtr.Close() if err := gzPackFh.Sync(); err != nil { log.Fatal(err) } fi, err = gzPackFh.Stat() if err != nil { log.Fatal(err) } fmt.Printf(" -- size of gzip compressed metadata: %dk\n", fi.Size()/1024) } }