Beispiel #1
0
func main() {
	flag.Parse()

	for _, arg := range flag.Args() {
		fh, err := os.Open(arg)
		if err != nil {
			log.Fatal(err)
		}
		defer fh.Close()
		fi, err := fh.Stat()
		if err != nil {
			log.Fatal(err)
		}
		fmt.Printf("inspecting %q (size %dk)\n", fh.Name(), fi.Size()/1024)

		packFh, err := ioutil.TempFile("", "packed.")
		if err != nil {
			log.Fatal(err)
		}
		defer packFh.Close()
		if *flCleanup {
			defer os.Remove(packFh.Name())
		}

		sp := storage.NewJSONPacker(packFh)
		fp := storage.NewDiscardFilePutter()
		dissam, err := asm.NewInputTarStream(fh, sp, fp)
		if err != nil {
			log.Fatal(err)
		}

		var num int
		tr := tar.NewReader(dissam)
		for {
			_, err = tr.Next()
			if err != nil {
				if err == io.EOF {
					break
				}
				log.Fatal(err)
			}
			num++
			if _, err := io.Copy(ioutil.Discard, tr); err != nil {
				log.Fatal(err)
			}
		}
		fmt.Printf(" -- number of files: %d\n", num)

		if err := packFh.Sync(); err != nil {
			log.Fatal(err)
		}

		fi, err = packFh.Stat()
		if err != nil {
			log.Fatal(err)
		}
		fmt.Printf(" -- size of metadata uncompressed: %dk\n", fi.Size()/1024)

		gzPackFh, err := ioutil.TempFile("", "packed.gz.")
		if err != nil {
			log.Fatal(err)
		}
		defer gzPackFh.Close()
		if *flCleanup {
			defer os.Remove(gzPackFh.Name())
		}

		gzWrtr := gzip.NewWriter(gzPackFh)

		if _, err := packFh.Seek(0, 0); err != nil {
			log.Fatal(err)
		}

		if _, err := io.Copy(gzWrtr, packFh); err != nil {
			log.Fatal(err)
		}
		gzWrtr.Close()

		if err := gzPackFh.Sync(); err != nil {
			log.Fatal(err)
		}

		fi, err = gzPackFh.Stat()
		if err != nil {
			log.Fatal(err)
		}
		fmt.Printf(" -- size of gzip compressed metadata: %dk\n", fi.Size()/1024)
	}
}
Beispiel #2
0
// NewInputTarStream wraps the Reader stream of a tar archive and provides a
// Reader stream of the same.
//
// In the middle it will pack the segments and file metadata to storage.Packer
// `p`.
//
// The the storage.FilePutter is where payload of files in the stream are
// stashed. If this stashing is not needed, you can provide a nil
// storage.FilePutter. Since the checksumming is still needed, then a default
// of NewDiscardFilePutter will be used internally
func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) {
	// What to do here... folks will want their own access to the Reader that is
	// their tar archive stream, but we'll need that same stream to use our
	// forked 'archive/tar'.
	// Perhaps do an io.TeeReader that hands back an io.Reader for them to read
	// from, and we'll MITM the stream to store metadata.
	// We'll need a storage.FilePutter too ...

	// Another concern, whether to do any storage.FilePutter operations, such that we
	// don't extract any amount of the archive. But then again, we're not making
	// files/directories, hardlinks, etc. Just writing the io to the storage.FilePutter.
	// Perhaps we have a DiscardFilePutter that is a bit bucket.

	// we'll return the pipe reader, since TeeReader does not buffer and will
	// only read what the outputRdr Read's. Since Tar archives have padding on
	// the end, we want to be the one reading the padding, even if the user's
	// `archive/tar` doesn't care.
	pR, pW := io.Pipe()
	outputRdr := io.TeeReader(r, pW)

	// we need a putter that will generate the crc64 sums of file payloads
	if fp == nil {
		fp = storage.NewDiscardFilePutter()
	}

	go func() {
		tr := tar.NewReader(outputRdr)
		tr.RawAccounting = true
		for {
			hdr, err := tr.Next()
			if err != nil {
				if err != io.EOF {
					pW.CloseWithError(err)
					return
				}
				// even when an EOF is reached, there is often 1024 null bytes on
				// the end of an archive. Collect them too.
				_, err := p.AddEntry(storage.Entry{
					Type:    storage.SegmentType,
					Payload: tr.RawBytes(),
				})
				if err != nil {
					pW.CloseWithError(err)
				}
				break // not return. We need the end of the reader.
			}
			if hdr == nil {
				break // not return. We need the end of the reader.
			}

			if _, err := p.AddEntry(storage.Entry{
				Type:    storage.SegmentType,
				Payload: tr.RawBytes(),
			}); err != nil {
				pW.CloseWithError(err)
			}

			var csum []byte
			if hdr.Size > 0 {
				var err error
				_, csum, err = fp.Put(hdr.Name, tr)
				if err != nil {
					pW.CloseWithError(err)
				}
			}

			// File entries added, regardless of size
			_, err = p.AddEntry(storage.Entry{
				Type:    storage.FileType,
				Name:    hdr.Name,
				Size:    hdr.Size,
				Payload: csum,
			})
			if err != nil {
				pW.CloseWithError(err)
			}

			if b := tr.RawBytes(); len(b) > 0 {
				_, err = p.AddEntry(storage.Entry{
					Type:    storage.SegmentType,
					Payload: b,
				})
				if err != nil {
					pW.CloseWithError(err)
				}
			}
		}

		// it is allowable, and not uncommon that there is further padding on the
		// end of an archive, apart from the expected 1024 null bytes.
		remainder, err := ioutil.ReadAll(outputRdr)
		if err != nil && err != io.EOF {
			pW.CloseWithError(err)
		}
		_, err = p.AddEntry(storage.Entry{
			Type:    storage.SegmentType,
			Payload: remainder,
		})
		if err != nil {
			pW.CloseWithError(err)
		} else {
			pW.Close()
		}
	}()

	return pR, nil
}
Beispiel #3
0
func CommandChecksize(c *cli.Context) {
	if len(c.Args()) == 0 {
		logrus.Fatalf("please specify tar archives to check ('-' will check stdin)")
	}
	for _, arg := range c.Args() {
		fh, err := os.Open(arg)
		if err != nil {
			log.Fatal(err)
		}
		defer fh.Close()
		fi, err := fh.Stat()
		if err != nil {
			log.Fatal(err)
		}
		fmt.Printf("inspecting %q (size %dk)\n", fh.Name(), fi.Size()/1024)

		packFh, err := ioutil.TempFile("", "packed.")
		if err != nil {
			log.Fatal(err)
		}
		defer packFh.Close()
		if !c.Bool("work") {
			defer os.Remove(packFh.Name())
		} else {
			fmt.Printf(" -- working file preserved: %s\n", packFh.Name())
		}

		sp := storage.NewJSONPacker(packFh)
		fp := storage.NewDiscardFilePutter()
		dissam, err := asm.NewInputTarStream(fh, sp, fp)
		if err != nil {
			log.Fatal(err)
		}

		var num int
		tr := tar.NewReader(dissam)
		for {
			_, err = tr.Next()
			if err != nil {
				if err == io.EOF {
					break
				}
				log.Fatal(err)
			}
			num++
			if _, err := io.Copy(ioutil.Discard, tr); err != nil {
				log.Fatal(err)
			}
		}
		fmt.Printf(" -- number of files: %d\n", num)

		if err := packFh.Sync(); err != nil {
			log.Fatal(err)
		}

		fi, err = packFh.Stat()
		if err != nil {
			log.Fatal(err)
		}
		fmt.Printf(" -- size of metadata uncompressed: %dk\n", fi.Size()/1024)

		gzPackFh, err := ioutil.TempFile("", "packed.gz.")
		if err != nil {
			log.Fatal(err)
		}
		defer gzPackFh.Close()
		if !c.Bool("work") {
			defer os.Remove(gzPackFh.Name())
		}

		gzWrtr := gzip.NewWriter(gzPackFh)

		if _, err := packFh.Seek(0, 0); err != nil {
			log.Fatal(err)
		}

		if _, err := io.Copy(gzWrtr, packFh); err != nil {
			log.Fatal(err)
		}
		gzWrtr.Close()

		if err := gzPackFh.Sync(); err != nil {
			log.Fatal(err)
		}

		fi, err = gzPackFh.Stat()
		if err != nil {
			log.Fatal(err)
		}
		fmt.Printf(" -- size of gzip compressed metadata: %dk\n", fi.Size()/1024)
	}
}