// testFileList tests the decoding of a list of files against their // expected error and md5sum. func testFileList(t *testing.T, files []testFile, reuseReader bool) { var r *xz.Reader var err error if reuseReader { r, err = xz.NewReader(nil, 0) } for _, f := range files { func() { var fr *os.File fr, err = openTestFile(f.file) if err != nil { t.Fatal(err) } defer fr.Close() hash := md5.New() switch reuseReader { case true: err = r.Reset(fr) case false: r, err = xz.NewReader(fr, 0) } if err == nil { _, err = io.Copy(hash, r) } if err != f.err { t.Fatalf("%s: wanted error: %v, got: %v\n", f.file, f.err, err) } md5sum := fmt.Sprintf("%x", hash.Sum(nil)) if f.md5sum != md5sum { t.Fatalf( "%s: wanted md5: %v, got: %v\n", f.file, f.md5sum, md5sum) } }() } }
// testFileList tests the decoding of a list of files against their // expected error and md5sum. func testFileList(t *testing.T, dir string, files []testFile) { for _, f := range files { func() { fr, err := os.Open(filepath.Join("testdata", dir, f.file)) if err != nil { t.Fatal(err) } defer fr.Close() hash := md5.New() r, err := xz.NewReader(fr, 0) if err != nil { t.Fatal(err) } _, err = io.Copy(hash, r) if err != f.err { t.Fatalf("%s: wanted error: %v, got: %v\n", f.file, f.err, err) } md5sum := fmt.Sprintf("%x", hash.Sum(nil)) if f.md5sum != md5sum { t.Fatalf( "%s: wanted md5: %v, got: %v\n", f.file, f.md5sum, md5sum) } }() } }
func uncompress(pathToFile string) (*os.File, error) { var r io.Reader fh, err := os.Open(pathToFile) if err != nil { return nil, err } defer fh.Close() switch path.Ext(pathToFile) { case ".bz2": r = bzip2.NewReader(fh) case ".gz": r, err = gzip.NewReader(fh) if err != nil { return nil, err } case ".xz": r, err = xz.NewReader(fh, xz.DefaultDictMax) if err != nil { return nil, err } default: return nil, fmt.Errorf("%s has wrong file extension, currently supported %s", pathToFile, ".bz2, .gz, .xz") } tmpFile, err := ioutil.TempFile("", "") if err != nil { return nil, err } defer tmpFile.Close() if _, err := io.Copy(tmpFile, r); err != nil { return nil, err } return tmpFile, nil }
// testFileListByteReads tests the decoding of a list of files against // their expected error and md5sum. It uses a one byte input buffer // and one byte output buffer for each run of the decoder. func testFileListByteReads(t *testing.T, files []testFile) { for _, f := range files { func() { fr, err := openTestFile(f.file) if err != nil { t.Fatal(err) } defer fr.Close() hash := md5.New() obr := iotest.OneByteReader(fr) r, err := xz.NewReader(obr, 0) if err == nil { b := make([]byte, 1) var n int for err == nil { n, err = r.Read(b) if n == 1 { _, _ = hash.Write(b) } } if err == io.EOF { err = nil } } if err != f.err { t.Fatalf("%s: wanted error: %v, got: %v\n", f.file, f.err, err) } md5sum := fmt.Sprintf("%x", hash.Sum(nil)) if f.md5sum != md5sum { t.Fatalf( "%s: wanted md5: %v, got: %v\n", f.file, f.md5sum, md5sum) } }() } }
func TestMultipleBadReads(t *testing.T) { data, err := readTestFile("good-2-lzma2-corrupt.xz") if err != nil { t.Fatal(err) } r, err := xz.NewReader(bytes.NewReader(data), 0) if err != nil { t.Fatal(err) } b := make([]byte, 100) n, err := r.Read(b) if n != 6 || err != xz.ErrData { t.Fatalf("Read returned: (%d,%v), expected: (6,%v)\n", n, err, xz.ErrData) } n, err = r.Read(b) if n != 0 || err != xz.ErrData { t.Fatalf("Read returned: (%d,%v), expected: (0,%v)\n", n, err, xz.ErrData) } n, err = r.Read(b) if n != 0 || err != xz.ErrData { t.Fatalf("Read returned: (%d,%v), expected: (0,%v)\n", n, err, xz.ErrData) } }
// test to ensure that decoder errors are not returned prematurely // the test file returns 6 decoded bytes before corruption occurs func TestPrematureError(t *testing.T) { data, err := ioutil.ReadFile( filepath.Join("testdata", "other", "good-2-lzma2-corrupt.xz")) if err != nil { t.Fatal(err) } r, err := xz.NewReader(bytes.NewReader(data), 0) if err != nil { t.Fatal(err) } b := make([]byte, 2) n, err := r.Read(b) if n != 2 || err != nil { t.Fatalf("Read returned: (%d,%v), expected: (2,%v)\n", n, err, nil) } n, err = r.Read(b) if n != 2 || err != nil { t.Fatalf("Read returned: (%d,%v), expected: (2,%v)\n", n, err, nil) } n, err = r.Read(b) if n != 2 || err != xz.ErrData { t.Fatalf("Read returned: (%d,%v), expected: (2,%v)\n", n, err, xz.ErrData) } }
func OpenFile(f *os.File) (reader io.Reader, err error) { filename := f.Name() if strings.HasSuffix(filename, ".gz") { // handle gzip gf, err := gzip.NewReader(f) if err != nil { f.Seek(0, 0) reader = f } else { reader = gf defer gf.Close() } } else if strings.HasSuffix(filename, ".bz2") { // handle bz2 -- no bzip2.Close() or error return... bf := bzip2.NewReader(f) reader = bf } else if strings.HasSuffix(filename, ".xz") { // handle xz xf, err := xz.NewReader(f, 0) if err != nil { log.Printf("error reading .xz file = %s, skipping...\n", err) return reader, err } else { reader = xf // XXX xz has no xz.Close() } } else { // just a plain file reader = f } return reader, nil }
// Need to uncompress the file to be able to generate the Image ID func (r Registry) uncompress() error { acifile, err := os.Open(r.tmppath()) if err != nil { return err } defer acifile.Close() typ, err := aci.DetectFileType(acifile) if err != nil { return err } // In case DetectFileType changed the cursor _, err = acifile.Seek(0, 0) if err != nil { return err } var in io.Reader switch typ { case aci.TypeGzip: in, err = gzip.NewReader(acifile) if err != nil { return err } case aci.TypeBzip2: in = bzip2.NewReader(acifile) case aci.TypeXz: in, err = xz.NewReader(acifile, 0) if err != nil { return err } case aci.TypeTar: in = acifile case aci.TypeText: return fmt.Errorf("downloaded ACI is text, not a tarball") case aci.TypeUnknown: return fmt.Errorf("downloaded ACI is of an unknown type") } out, err := os.OpenFile(r.tmpuncompressedpath(), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) if err != nil { return err } defer out.Close() _, err = io.Copy(out, in) if err != nil { return fmt.Errorf("error copying: %v", err) } err = out.Sync() if err != nil { return fmt.Errorf("error writing: %v", err) } return nil }
func process_pcap(filename string) (times tf_time.Times, err error) { var reader io.Reader f, err := os.Open(filename) if err != nil { return times, err } defer f.Close() if strings.Contains(filename, ".gz") { // handle gzip gf, err := gzip.NewReader(f) if err != nil { f.Seek(0, 0) reader = f } else { reader = gf defer gf.Close() } } else if strings.Contains(filename, ".xz") { // handle xz xf, err := xz.NewReader(f, 0) if err != nil { log.Printf("error reading .xz file = %s, skipping...\n", err) return times, err } else { reader = xf // XXX xz has no xz.Close() } } else { // just a plain, raw .pcap file reader = f } // start reading pcap pf, err := pcap.NewReader(reader) if err != nil { return times, err } for { ff, _ := pf.ReadFrame() if ff == nil { break } when := ff.Time() t := when.UTC() if times.Earliest.IsZero() || t.Before(times.Earliest) { times.Earliest = t } if times.Latest.IsZero() || t.After(times.Latest) { times.Latest = t } } return times, nil }
func (l *XZByteSource) Open() (io.ReadCloser, error) { f, err := l.Inner.Open() if err != nil { return nil, err } r, err := xz.NewReader(f, 0) if err != nil { loggedClose(f, l.Inner.Name()) return nil, fmt.Errorf("error opening file for XZ decompression (%s): %v", l.Name(), err) } return HookedCloser(r, f), nil }
func ExampleReader_Multistream() { // load some XZ data into memory data, err := ioutil.ReadFile( filepath.Join("testdata", "xz-utils", "good-1-check-sha256.xz")) if err != nil { log.Fatal(err) } // create a MultiReader that will read the data twice mr := io.MultiReader(bytes.NewReader(data), bytes.NewReader(data)) // create an xz.Reader from the MultiReader r, err := xz.NewReader(mr, 0) if err != nil { log.Fatal(err) } // set Multistream mode to false r.Multistream(false) // decompress the first stream _, err = io.Copy(os.Stdout, r) if err != nil { log.Fatal(err) } fmt.Println("Read first stream") // reset the XZ reader so it is ready to read the second stream err = r.Reset(nil) if err != nil { log.Fatal(err) } // set Multistream mode to false again r.Multistream(false) // decompress the second stream _, err = io.Copy(os.Stdout, r) if err != nil { log.Fatal(err) } fmt.Println("Read second stream") // reset the XZ reader so it is ready to read further streams err = r.Reset(nil) // confirm that the second stream was the last one if err == io.EOF { fmt.Println("No more streams") } // Output: // Hello // World! // Read first stream // Hello // World! // Read second stream // No more streams }
func TestMemlimit(t *testing.T) { data, err := readTestFile("words.xz") if err != nil { t.Fatal(err) } r, err := xz.NewReader(bytes.NewReader(data), 1<<25) if err == nil { b := new(bytes.Buffer) _, err = io.Copy(b, r) } if err != xz.ErrMemlimit { t.Fatalf("wanted error: %v, got: %v\n", xz.ErrMemlimit, err) } }
func TestMultistream(t *testing.T) { files := []string{ "good-1-x86-lzma2-offset-2048.xz", "random-1mb.xz", "words.xz", "good-1-x86-lzma2-offset-2048.xz", "random-1mb.xz", "words.xz", } var readers []io.Reader for _, f := range files { data, err := readTestFile(f) if err != nil { t.Fatal(err) } readers = append(readers, bytes.NewReader(data)) } mr := io.MultiReader(readers...) r, err := xz.NewReader(mr, 0) if err != nil { t.Fatal(err) } for i, f := range files { r.Multistream(false) hash := md5.New() _, err = io.Copy(hash, r) if err != nil { t.Fatalf("%s: wanted copy error: %v, got: %v\n", f, nil, err) } md5sum := fmt.Sprintf("%x", hash.Sum(nil)) wantedMD5, _ := testFileData(f) if wantedMD5 != md5sum { t.Fatalf( "%s: wanted md5: %v, got: %v\n", f, wantedMD5, md5sum) } err = r.Reset(nil) var wantedErr error switch { case i < len(files)-1: wantedErr = nil case i == len(files)-1: wantedErr = io.EOF } if wantedErr != err { t.Fatalf("%s: wanted reset error: %v, got: %v\n", f, wantedErr, err) } } }
func TestMemlimit(t *testing.T) { data, err := ioutil.ReadFile( filepath.Join("testdata", "other", "pg2242.txt.xz")) if err != nil { t.Fatal(err) } r, err := xz.NewReader(bytes.NewReader(data), 1<<25) if err != nil { t.Fatal(err) } b := new(bytes.Buffer) _, err = io.Copy(b, r) if err != xz.ErrMemlimit { t.Fatalf("wanted error: %v, got: %v\n", xz.ErrMemlimit, err) } }
// Open a xz compressed archive and import it. // Uses the "xi2.org/x/xz" package to read xz files. func ExampleImport_xz() { r, err := os.Open("rockyou.txt.xz") if err != nil { // Fake it fmt.Println("Imported", 9341543, "items") return } xzr, err := xz.NewReader(r, 0) if err != nil { panic(err) } mem := testdb.NewMemDBBulk() in := tokenizer.NewLine(xzr) err = Import(in, mem, nil) if err != nil { panic(err) } fmt.Println("Imported", len(*mem), "items") // Output: Imported 9341543 items }
func ExampleNewReader() { // load some XZ data into memory data, err := ioutil.ReadFile( filepath.Join("testdata", "xz-utils", "good-1-check-sha256.xz")) if err != nil { log.Fatal(err) } // create an xz.Reader to decompress the data r, err := xz.NewReader(bytes.NewReader(data), 0) if err != nil { log.Fatal(err) } // write the decompressed data to os.Stdout _, err = io.Copy(os.Stdout, r) if err != nil { log.Fatal(err) } // Output: // Hello // World! }
// TestReuseReaderPartialReads repeatedly tests decoding a file with a // reused Reader that has been used immediately before to partially // decode a file. The amount of partial decoding before the full // decode is varied on each loop iteration. func TestReuseReaderPartialReads(t *testing.T) { data, err := readTestFile("words.xz") if err != nil { t.Fatal(err) } z, err := xz.NewReader(nil, 0) if err != nil { t.Fatal(err) } for i := 0; i <= 80000; i += 10000 { err = z.Reset(bytes.NewReader(data)) if err != nil { t.Fatal(err) } b := make([]byte, i) _, err = io.ReadFull(z, b) if err != nil { t.Fatalf("io.ReadFull: wanted error: %v, got: %v\n", nil, err) } err = z.Reset(bytes.NewReader(data)) if err != nil { t.Fatal(err) } hash := md5.New() _, err = io.Copy(hash, z) if err != nil { t.Fatalf("io.Copy: wanted error: %v, got: %v\n", nil, err) } md5sum := fmt.Sprintf("%x", hash.Sum(nil)) wantedMD5, _ := testFileData("words.xz") if wantedMD5 != md5sum { t.Fatalf( "hash.Sum: wanted md5: %v, got: %v\n", wantedMD5, md5sum) } } }
func StreamCSV(csvChannel chan<- map[string]string, filename string) { var err error // open compressed csv file var file *os.File if file, err = os.Open(filename); err != nil { log.Fatal(err) } defer file.Close() var ioReader io.Reader filename = strings.ToLower(filename) var csvReader *csv.Reader if strings.HasSuffix(filename, ".bz2") { ioReader = bzip2.NewReader(file) csvReader = csv.NewReader(ioReader) } else if strings.HasSuffix(filename, ".xz") { ioReader = bufio.NewReader(file) //if ioReader, err = xz.NewReader(ioReader, 0); err != nil { if ioReader, err = xz.NewReader(file, 0); err != nil { log.Fatal(err) } csvReader = csv.NewReader(ioReader) } else { // log.Fatal("input mush be copressed") //reader = bufio.NewReader(file) //reader = os.Open(file) // bufio.NewReader(file) csvReader = csv.NewReader(file) } // create csv reader csvReader.FieldsPerRecord = 0 // ident colum names colMapping := make(map[int]string) rec, err := csvReader.Read() if err != nil { log.Fatalf("ERROR: %v\n", err) } for key, value := range rec { colMapping[key] = value } for { rec, err := csvReader.Read() if err != nil { break } line := make(map[string]string) for key, value := range rec { if value == "" { continue } line[colMapping[key]] = value } csvChannel <- line } close(csvChannel) }
func process_fsdb(filename string, col int) (times tf_time.Times, err error) { var reader io.Reader f, err := os.Open(filename) if err != nil { return times, err } defer f.Close() if strings.Contains(filename, ".gz") { // handle gzip gf, err := gzip.NewReader(f) if err != nil { f.Seek(0, 0) reader = f } else { reader = gf defer gf.Close() } } else if strings.Contains(filename, ".xz") { // handle xz xf, err := xz.NewReader(f, 0) if err != nil { log.Printf("error reading .xz file = %s, skipping...\n", err) return times, err } else { reader = xf // XXX xz has no xz.Close() } } else { // just a plain .fsdb file reader = f } // now process files scanner := bufio.NewScanner(reader) for scanner.Scan() { // read line line := scanner.Text() if strings.HasPrefix(line, "#") { // if a comment or header, continue continue } // only want the column # "col" ts := strings.SplitN(line, "\t", col+1)[col-1] // convert unixtimestamp into golang time // accepts both second and nanosecond precision tm, err := tf_time.UnmarshalTime([]byte(ts)) if err != nil { return times, err } if times.Earliest.IsZero() || tm.Before(times.Earliest) { times.Earliest = tm } if times.Latest.IsZero() || tm.After(times.Latest) { times.Latest = tm } } return times, err }
func xzNewReader(r io.Reader) (io.Reader, error) { return xz.NewReader(r, 0) }