func corpusFromTar(tarReader *tar.Reader) chan TrainingSample { corpus := make(chan TrainingSample) go func() { for { header, err := tarReader.Next() if err == io.EOF { break } if err == nil && header.Typeflag == tar.TypeReg { if strings.HasSuffix(header.Name, ".spam.txt") { text := readerToString(tarReader) fmt.Println("Training on", header.Name) addDocumentToCorpus(text, SPAM, corpus) } else if strings.HasSuffix(header.Name, ".ham.txt") { text := readerToString(tarReader) fmt.Println("Training on", header.Name) addDocumentToCorpus(text, HAM, corpus) } } else { continue } } close(corpus) }() return corpus }
// TarFileList ... func TarFileList(filename string) ([]string, error) { reader, err := os.Open(filename) if err != nil { return nil, err } defer reader.Close() var tarReader *tar.Reader if strings.HasSuffix(filename, ".gz") || strings.HasSuffix(filename, ".tgz") { gzipReader, err := gzip.NewReader(reader) if err != nil { return nil, err } tarReader = tar.NewReader(gzipReader) } else { tarReader = tar.NewReader(reader) } var files []string for { header, err := tarReader.Next() if err != nil { if err == io.EOF { break } return files, err } if header == nil { break } files = append(files, header.Name) } return files, nil }
func extractTar(archive *tar.Reader, path string) error { for { hdr, err := archive.Next() if err == io.EOF { return nil } if err != nil { return err } path := filepath.Join(path, hdr.Name) switch { case hdr.FileInfo().IsDir(): if err := os.MkdirAll(path, hdr.FileInfo().Mode()); err != nil { return err } case hdr.Linkname != "": if err := os.Symlink(hdr.Linkname, path); err != nil { // just warn for now fmt.Fprintln(os.Stderr, err) // return err } default: if err := extractFile(archive, hdr, path); err != nil { return err } } } }
// ExtractTar extracts the contents of tr to the given dir. It // returns an error, if any. func ExtractTar(dir string, tr *tar.Reader) error { for { hdr, err := tr.Next() if err != nil { if err == io.EOF { break } return err } if hdr.Typeflag == tar.TypeDir { err = os.MkdirAll(filepath.Join(dir, hdr.Name), 0755) if err != nil { return err } } else { file, err := os.OpenFile(filepath.Join(dir, hdr.Name), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.FileMode(hdr.Mode), ) if err != nil { return err } defer file.Close() _, err = io.Copy(file, tr) if err != nil { return err } } } return nil }
func copyTar(dest *tar.Writer, src *tar.Reader, f func(*tar.Header) bool) error { for { hdr, err := src.Next() if err == io.EOF { break } else if err != nil { return err } if f != nil && !f(hdr) { continue } if err := dest.WriteHeader(hdr); err != nil { return err } if _, err := io.Copy(dest, src); err != nil { return err } } return nil }
// readFileFromBackup copies the next file from the archive into the shard. // The file is skipped if it does not have a matching shardRelativePath prefix. func (e *Engine) readFileFromBackup(tr *tar.Reader, shardRelativePath string) error { // Read next archive file. hdr, err := tr.Next() if err != nil { return err } // Skip file if it does not have a matching prefix. if !filepath.HasPrefix(hdr.Name, shardRelativePath) { return nil } path, err := filepath.Rel(shardRelativePath, hdr.Name) if err != nil { return err } // Create new file on disk. f, err := os.OpenFile(filepath.Join(e.path, path), os.O_CREATE|os.O_RDWR, 0666) if err != nil { return err } defer f.Close() // Copy from archive to the file. if _, err := io.CopyN(f, tr, hdr.Size); err != nil { return err } // Sync to disk & close. if err := f.Sync(); err != nil { return err } return f.Close() }
// Validate that the source tar stream conforms to expected specs func (d DatasetSplitter) validate(source *tar.Reader) (bool, error) { // validation rules: // 1. has at least 2 files // 2. the depth of each file is 2 (folder/filename.xxx) numFiles := 0 for { hdr, err := source.Next() if err == io.EOF { // end of tar archive break } if err != nil { return false, err } numFiles += 1 pathComponents := strings.Split(hdr.Name, "/") if len(pathComponents) != 2 { return false, fmt.Errorf("Path does not have 2 components: %v", hdr.Name) } } if numFiles < 2 { return false, fmt.Errorf("Archive must contain at least 2 files") } return true, nil }
func TarServer(path string) (*tarServer, error) { var buffer []byte var reader *bytes.Reader var handle *tar.Reader var cache map[string]ReadWriteContainer var header *tar.Header var err error fmt.Printf("Opening tar server: %s\n", path) if buffer, err = ioutil.ReadFile(path); err != nil { return nil, err } reader = bytes.NewReader(buffer) if handle = tar.NewReader(reader); err != nil { return nil, err } modTime := time.Now().Add(-2 * time.Second) cache = make(map[string]ReadWriteContainer) cache["/"] = TarFile("/", true, modTime, make([]byte, 0)) for { if header, err = handle.Next(); err != nil { if err == io.EOF { break // End of archive } return nil, err } parentFile := cache["/"] parts := strings.Split(header.Name, "/") partPath := "/" for i, part := range parts { if i == len(parts)-1 { // File b := new(bytes.Buffer) if _, err = io.Copy(b, handle); err != nil { return nil, err } partPath += part file := TarFile(partPath, false, modTime, b.Bytes()) parentFile.AddChild(file) cache[partPath] = file break } // Dir partPath += part + "/" if tempFile, ok := cache[partPath]; ok { parentFile = tempFile continue } else { // Didn't find the dir in the cache // Make the dir, add it, cache it and set it to parent dir := TarFile(partPath, true, modTime, make([]byte, 0)) parentFile.AddChild(dir) cache[partPath] = dir parentFile = dir } } } return &tarServer{ cache: cache, }, nil }
func parsePackageDefinition(reader *tar.Reader, pkg *Package, details *Details) error { cin := make(chan int) cout := make(chan *Token) cerr := make(chan error) cdone := make(chan bool) go parseSimpleSexp(cin, cout, cdone) go readPackageDefinition(cout, cerr, pkg, details) bytes := make([]byte, 256) for { n, err := reader.Read(bytes) if err == io.EOF { cdone <- true break } if err != nil { cdone <- true return err } for _, b := range bytes[:n] { select { case err = <-cerr: return err default: cin <- int(b) } } } return <-cerr }
func (t *TarInfo) Load(file io.ReadSeeker) { var reader *tar.Reader file.Seek(0, 0) gzipReader, err := gzip.NewReader(file) if err != nil { // likely not a gzip compressed file file.Seek(0, 0) reader = tar.NewReader(file) } else { reader = tar.NewReader(gzipReader) } for { header, err := reader.Next() if err == io.EOF { // end of tar file break } else if err != nil { // error occured logger.Debug("[TarInfoLoad] Error when reading tar stream tarsum. Disabling TarSum, TarFilesInfo. Error: %s", err.Error()) t.Error = TarError(err.Error()) return } t.TarSum.Append(header, reader) t.TarFilesInfo.Append(header) } }
// extractFileFromTar extracts a regular file from the given tar, returning its // contents as a byte slice func extractFileFromTar(tr *tar.Reader, file string) ([]byte, error) { for { hdr, err := tr.Next() switch err { case io.EOF: return nil, fmt.Errorf("file not found") case nil: if filepath.Clean(hdr.Name) != filepath.Clean(file) { continue } switch hdr.Typeflag { case tar.TypeReg: case tar.TypeRegA: default: return nil, fmt.Errorf("requested file not a regular file") } buf, err := ioutil.ReadAll(tr) if err != nil { return nil, fmt.Errorf("error extracting tarball: %v", err) } return buf, nil default: return nil, fmt.Errorf("error extracting tarball: %v", err) } } }
func (lc *localContainer) extractFileSystem(fs *tar.Reader) error { hdr, err := fs.Next() for err != io.EOF { if err != nil { return err } file := filepath.Join(lc.Root, hdr.Name) switch hdr.Typeflag { case tar.TypeDir: err := os.MkdirAll(file, os.FileMode(0755)) if err != nil { return err } case tar.TypeReg: err := lc.writeFile(file, fs) if err != nil { return err } default: log.Printf("Encountered unknown file type 0x%02x, skipping", hdr.Typeflag) } hdr, err = fs.Next() } return nil }
func newDecompressionEntry(archive *tar.Reader) (*DecompressionEntry, error) { header, err := archive.Next() if err == io.EOF { return nil, nil } if err != nil { return nil, fmt.Errorf("Failed to read header: %s", err) } if header == nil { return nil, nil } dataReader := &closeableReader{ reader: io.LimitReader(archive, header.Size), } return &DecompressionEntry{ RawEntry: RawEntry{ RawHeader: header, RawData: dataReader, }, data: dataReader, }, nil }
func processTarStream(tr *tar.Reader, destination string) error { for { hdr, err := tr.Next() if err != nil { if err == io.EOF { return nil } return fmt.Errorf("Unable to extract container: %v\n", err) } hdrInfo := hdr.FileInfo() dstpath := path.Join(destination, strings.TrimPrefix(hdr.Name, DOCKER_TAR_PREFIX)) // Overriding permissions to allow writing content mode := hdrInfo.Mode() | OWNER_PERM_RW switch hdr.Typeflag { case tar.TypeDir: if err := os.Mkdir(dstpath, mode); err != nil { if !os.IsExist(err) { return fmt.Errorf("Unable to create directory: %v", err) } err = os.Chmod(dstpath, mode) if err != nil { return fmt.Errorf("Unable to update directory mode: %v", err) } } case tar.TypeReg, tar.TypeRegA: file, err := os.OpenFile(dstpath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, mode) if err != nil { return fmt.Errorf("Unable to create file: %v", err) } if _, err := io.Copy(file, tr); err != nil { file.Close() return fmt.Errorf("Unable to write into file: %v", err) } file.Close() case tar.TypeSymlink: if err := os.Symlink(hdr.Linkname, dstpath); err != nil { return fmt.Errorf("Unable to create symlink: %v\n", err) } case tar.TypeLink: target := path.Join(destination, strings.TrimPrefix(hdr.Linkname, DOCKER_TAR_PREFIX)) if err := os.Link(target, dstpath); err != nil { return fmt.Errorf("Unable to create link: %v\n", err) } default: // For now we're skipping anything else. Special device files and // symlinks are not needed or anyway probably incorrect. } // maintaining access and modification time in best effort fashion os.Chtimes(dstpath, hdr.AccessTime, hdr.ModTime) } }
// Ungzip and untar from source file to destination directory // you need check file exist before you call this function func UnTarGz(srcFilePath string, destDirPath string) error { //fmt.Println("UnTarGzing " + srcFilePath + "...") // Create destination directory os.Mkdir(destDirPath, os.ModePerm) var tr *tar.Reader fr, err := os.Open(srcFilePath) if err != nil { return err } defer fr.Close() if strings.HasSuffix(srcFilePath, ".bz2") { br := bzip2.NewReader(fr) tr = tar.NewReader(br) } else { // Gzip reader gr, err := gzip.NewReader(fr) if err != nil { return err } defer gr.Close() // Tar reader tr = tar.NewReader(gr) } for { hdr, err := tr.Next() if err == io.EOF { // End of tar archive break } //handleError(err) //fmt.Println("UnTarGzing file..." + hdr.Name) // Check if it is diretory or file if hdr.Typeflag != tar.TypeDir { // Get files from archive // Create diretory before create file os.MkdirAll(destDirPath+"/"+path.Dir(hdr.Name), os.ModePerm) // Write data to file fw, _ := os.Create(destDirPath + "/" + hdr.Name) os.Chmod(destDirPath+"/"+hdr.Name, os.FileMode(hdr.Mode)) if err != nil { return err } _, err = io.Copy(fw, tr) if err != nil { return err } } } //fmt.Println("Well done!") return nil }
func readNextEntry(tarFile *tar.Reader) []byte { _, err := tarFile.Next() if err != nil { log.Fatalln(err) } result, err := ioutil.ReadAll(tarFile) if err != nil { log.Fatalln(err) } return result }
// extract 'control' file from 'reader'. the contents of a 'control' file // is a set of key-value pairs as described in // https://www.debian.org/doc/debian-policy/ch-controlfields.html func extractControlFromIpk(reader io.Reader) (string, error) { var ( arReader *ar.Reader tarReader *tar.Reader gzReader *gzip.Reader ) arReader = ar.NewReader(reader) for { header, err := arReader.Next() if err != nil && err != io.EOF { return "", fmt.Errorf("extracting contents: %v", err) } else if header == nil { break } // NOTE: strangeley the name of the files end with a "/" ... content error? if header.Name == "control.tar.gz/" || header.Name == "control.tar.gz" { gzReader, err = gzip.NewReader(arReader) if err != nil { return "", fmt.Errorf("analyzing control.tar.gz: %v", err) } break } } if gzReader == nil { return "", fmt.Errorf("missing control.tar.gz entry") } defer gzReader.Close() buffer := bytes.NewBuffer(nil) tarReader = tar.NewReader(gzReader) for { header, err := tarReader.Next() if err != nil && err != io.EOF { return "", fmt.Errorf("extracting control.tar.gz: %v", err) } else if header == nil { break } if header.Name != "./control" { continue } io.Copy(buffer, tarReader) break } if buffer.Len() == 0 { return "", fmt.Errorf("missing or empty 'control' file inside 'control.tar.gz'") } return buffer.String(), nil }
func getNext(tr *tar.Reader) (*tar.Header, error) { for { hdr, err := tr.Next() if err == io.EOF { // we've reached end of archive return hdr, err } else if err != nil { return nil, errors.Wrapf(err, "reader: error reading archive") } return hdr, nil } }
// firstFile extracts the first file from a tar archive. func firstFile(tr *tar.Reader) ([]byte, error) { if _, err := tr.Next(); err != nil { return nil, err } var buf bytes.Buffer if _, err := io.Copy(&buf, tr); err != nil { return nil, err } return buf.Bytes(), nil }
func readDebControlFile(reader io.Reader) (string, error) { archiveReader := ar.NewReader(reader) for { header, err := archiveReader.Next() if err == io.EOF { break } if err != nil { panic(err) } if strings.HasPrefix(header.Name, "control.tar") { var controlReader *tar.Reader if strings.HasSuffix(header.Name, "gz") { gzipStream, err := gzip.NewReader(archiveReader) if err != nil { panic(err) } controlReader = tar.NewReader(gzipStream) } else { return "", errors.New("Compression type not supported") } for { header, err := controlReader.Next() if err == io.EOF { break } if err != nil { panic(err) } if strings.HasSuffix(header.Name, "control") { var buffer bytes.Buffer _, err := io.Copy(bufio.NewWriter(&buffer), controlReader) if err != nil { panic(err) } return buffer.String(), nil } } } } return "", errors.New("Couldn't find control file in package") }
func prepairSquash(rs io.ReadSeeker, layers []string) (files map[string]fileInfo, json, version []byte, err error) { files = make(map[string]fileInfo) json = nil for _, layerId := range layers { var layer *tar.Reader var newJSON []byte newJSON, version, layer, err = readLayer(rs, layerId) if err != nil { err = fmt.Errorf("read layers: %v", err) return } if json != nil { json, err = merge(json, newJSON) if err != nil { err = fmt.Errorf("merge: %v", err) return } } else { json = newJSON } for { var h *tar.Header if h, err = layer.Next(); err == io.EOF { err = nil break } if err != nil { return } dir, file := path.Split(h.Name) if !strings.HasPrefix(file, ".wh.") { files[h.Name] = fileInfo{layerId, h.Size} continue } name := path.Join(dir, strings.TrimPrefix(file, ".wh.")) if _, ok := files[name]; !ok { name = name + "/" if _, ok := files[name]; !ok { err = fmt.Errorf("whiteout file '%s' found without existing file '%s'", h.Name, name) return } } delete(files, name) } } return }
func t(tr *tar.Reader) error { for { hdr, err := tr.Next() if err == io.EOF { return nil } if err != nil { return err } fmt.Printf("%s\n", hdr.Name) } return nil //shouldn't get here }
func nextFile(tr *tar.Reader, name string) (io.Reader, error) { h, err := tr.Next() if err == io.EOF { return nil, io.ErrUnexpectedEOF } if err != nil { return nil, err } if h.Name != name { return nil, fmt.Errorf("expected file %s got %s", name, h.Name) } if h.Typeflag != tar.TypeReg { return nil, fmt.Errorf("expected %s to be a regular file", name) } return tr, nil }
// Walk walks through the files in the tarball represented by tarstream and // passes each of them to the WalkFunc provided as an argument func Walk(tarReader tar.Reader, walkFunc func(t *TarFile) error) error { for { hdr, err := tarReader.Next() if err == io.EOF { // end of tar archive break } if err != nil { return fmt.Errorf("Error reading tar entry: %v", err) } if err := walkFunc(&TarFile{Header: hdr, TarStream: &tarReader}); err != nil { return err } } return nil }
func fsObject(r *tar.Reader) map[string]string { fs := map[string]string{} for { hdr, err := r.Next() if err != nil { break } if !strings.HasSuffix(hdr.Name, ".go") { continue } buf := &bytes.Buffer{} io.Copy(buf, r) fs[hdr.Name] = buf.String() } return fs }
// init initializes a new ReadCloser. func (rc *ReadCloser) init(r *tar.Reader) error { defer rc.Close() rc.File = make([]*tar.Header, 0, 10) for { h, err := r.Next() if err == io.EOF { break } else if err != nil { return err } rc.File = append(rc.File, h) } return nil }
func Walk(tarReader *tar.Reader, walkFunc func(hdr *tar.Header) error) error { for { hdr, err := tarReader.Next() if err == io.EOF { // end of tar archive break } if err != nil { return fmt.Errorf("Error reading tar entry: %v", err) } if err := walkFunc(hdr); err != nil { return err } } return nil }
func parseSyncPackage(r *tar.Reader) (pkg *pkginfo.Package, err error) { var hdr *tar.Header if hdr, err = r.Next(); err != nil { return nil, err } if hdr.Typeflag != tar.TypeDir { return nil, errors.New(fmt.Sprintf("expected directory, found %s (%d)", hdr.Name, hdr.Typeflag)) } dirPath := hdr.Name descPath := path.Join(dirPath, "desc") depsPath := path.Join(dirPath, "depends") var desc *pkginfo.Desc var deps *pkginfo.Deps for desc == nil || deps == nil { if hdr, err = tarMustNext(r.Next()); err != nil { return } if hdr.Typeflag != tar.TypeReg && hdr.Typeflag != tar.TypeRegA { err = errors.New(fmt.Sprintf("expected file, found %d (%s)", hdr.Typeflag, hdr.Name)) return } pkgReader := paclist.NewReader(r) switch hdr.Name { default: err = errors.New(fmt.Sprintf("expected %s | %s, found %s", descPath, depsPath, hdr.Name)) case descPath: desc, err = pkginfo.DecodeDesc(pkgReader) case depsPath: deps, err = pkginfo.DecodeDeps(pkgReader) } if err != nil { err = errors.New(fmt.Sprintf("couldn't load package %s: %s", dirPath, err)) return } } return &pkginfo.Package{*desc, *deps}, nil }
func untar(tr *tar.Reader, r io.Reader, path string) error { for { hdr, err := tr.Next() if err == io.EOF { return nil } if err != nil { return err } path := filepath.Join(path, hdr.Name) info := hdr.FileInfo() _, errOld := os.Lstat(path) log.Print(path) if info.IsDir() { if os.IsNotExist(errOld) { if err := os.Mkdir(path, info.Mode()); err != nil { return err } } if err := os.Chown(path, hdr.Uid, hdr.Gid); err != nil { return err } if err := os.Chmod(path, info.Mode()); err != nil { return err } } else { file, err := os.Create(path) if err != nil { return err } if err := file.Chown(hdr.Uid, hdr.Gid); err != nil { return err } if err := file.Chmod(info.Mode()); err != nil { return err } if _, err := io.Copy(file, r); err != nil { return err } if err := os.Chtimes(path, hdr.AccessTime, hdr.ModTime); err != nil { // doesn't work for directories? return err } } } }
func tarCopy(w *tar.Writer, r *tar.Reader) error { for { hdr, err := r.Next() if err == io.EOF { // end of tar archive return nil } if err != nil { return err } if err := w.WriteHeader(hdr); err != nil { return err } if _, err := io.Copy(w, r); err != nil { return err } } }