Ejemplo n.º 1
0
// NewInputTarStream wraps the Reader stream of a tar archive and provides a
// Reader stream of the same.
//
// In the middle it will pack the segments and file metadata to storage.Packer
// `p`.
//
// The the storage.FilePutter is where payload of files in the stream are
// stashed. If this stashing is not needed, you can provide a nil
// storage.FilePutter. Since the checksumming is still needed, then a default
// of NewDiscardFilePutter will be used internally
func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) {
	// What to do here... folks will want their own access to the Reader that is
	// their tar archive stream, but we'll need that same stream to use our
	// forked 'archive/tar'.
	// Perhaps do an io.TeeReader that hands back an io.Reader for them to read
	// from, and we'll MITM the stream to store metadata.
	// We'll need a storage.FilePutter too ...

	// Another concern, whether to do any storage.FilePutter operations, such that we
	// don't extract any amount of the archive. But then again, we're not making
	// files/directories, hardlinks, etc. Just writing the io to the storage.FilePutter.
	// Perhaps we have a DiscardFilePutter that is a bit bucket.

	// we'll return the pipe reader, since TeeReader does not buffer and will
	// only read what the outputRdr Read's. Since Tar archives have padding on
	// the end, we want to be the one reading the padding, even if the user's
	// `archive/tar` doesn't care.
	pR, pW := io.Pipe()
	outputRdr := io.TeeReader(r, pW)

	// we need a putter that will generate the crc64 sums of file payloads
	if fp == nil {
		fp = storage.NewDiscardFilePutter()
	}

	go func() {
		tr := tar.NewReader(outputRdr)
		tr.RawAccounting = true
		for {
			hdr, err := tr.Next()
			if err != nil {
				if err != io.EOF {
					pW.CloseWithError(err)
					return
				}
				// even when an EOF is reached, there is often 1024 null bytes on
				// the end of an archive. Collect them too.
				_, err := p.AddEntry(storage.Entry{
					Type:    storage.SegmentType,
					Payload: tr.RawBytes(),
				})
				if err != nil {
					pW.CloseWithError(err)
				}
				break // not return. We need the end of the reader.
			}
			if hdr == nil {
				break // not return. We need the end of the reader.
			}

			if _, err := p.AddEntry(storage.Entry{
				Type:    storage.SegmentType,
				Payload: tr.RawBytes(),
			}); err != nil {
				pW.CloseWithError(err)
			}

			var csum []byte
			if hdr.Size > 0 {
				var err error
				_, csum, err = fp.Put(hdr.Name, tr)
				if err != nil {
					pW.CloseWithError(err)
				}
			}

			// File entries added, regardless of size
			_, err = p.AddEntry(storage.Entry{
				Type:    storage.FileType,
				Name:    hdr.Name,
				Size:    hdr.Size,
				Payload: csum,
			})
			if err != nil {
				pW.CloseWithError(err)
			}

			if b := tr.RawBytes(); len(b) > 0 {
				_, err = p.AddEntry(storage.Entry{
					Type:    storage.SegmentType,
					Payload: b,
				})
				if err != nil {
					pW.CloseWithError(err)
				}
			}
		}

		// it is allowable, and not uncommon that there is further padding on the
		// end of an archive, apart from the expected 1024 null bytes.
		remainder, err := ioutil.ReadAll(outputRdr)
		if err != nil && err != io.EOF {
			pW.CloseWithError(err)
		}
		_, err = p.AddEntry(storage.Entry{
			Type:    storage.SegmentType,
			Payload: remainder,
		})
		if err != nil {
			pW.CloseWithError(err)
		} else {
			pW.Close()
		}
	}()

	return pR, nil
}
Ejemplo n.º 2
0
func main() {
	flag.Parse()
	log.SetOutput(os.Stderr)
	for _, arg := range flag.Args() {
		func() {
			// Open the tar archive
			fh, err := os.Open(arg)
			if err != nil {
				log.Fatal(err, arg)
			}
			defer fh.Close()

			output, err := os.Create(fmt.Sprintf("%s.out", arg))
			if err != nil {
				log.Fatal(err)
			}
			defer output.Close()
			log.Printf("writing %q to %q", fh.Name(), output.Name())

			fi, err := fh.Stat()
			if err != nil {
				log.Fatal(err, fh.Name())
			}
			size := fi.Size()
			var sum int64
			tr := tar.NewReader(fh)
			tr.RawAccounting = true
			for {
				hdr, err := tr.Next()
				if err != nil {
					if err != io.EOF {
						log.Println(err)
					}
					// even when an EOF is reached, there is often 1024 null bytes on
					// the end of an archive. Collect them too.
					post := tr.RawBytes()
					output.Write(post)
					sum += int64(len(post))

					fmt.Printf("EOF padding: %d\n", len(post))
					break
				}

				pre := tr.RawBytes()
				output.Write(pre)
				sum += int64(len(pre))

				var i int64
				if i, err = io.Copy(output, tr); err != nil {
					log.Println(err)
					break
				}
				sum += i

				fmt.Println(hdr.Name, "pre:", len(pre), "read:", i)
			}

			// it is allowable, and not uncommon that there is further padding on the
			// end of an archive, apart from the expected 1024 null bytes
			remainder, err := ioutil.ReadAll(fh)
			if err != nil && err != io.EOF {
				log.Fatal(err, fh.Name())
			}
			output.Write(remainder)
			sum += int64(len(remainder))
			fmt.Printf("Remainder: %d\n", len(remainder))

			if size != sum {
				fmt.Printf("Size: %d; Sum: %d; Diff: %d\n", size, sum, size-sum)
				fmt.Printf("Compare like `cmp -bl %s %s | less`\n", fh.Name(), output.Name())
			} else {
				fmt.Printf("Size: %d; Sum: %d\n", size, sum)
			}
		}()
	}
}