// fetchFile fetches data from the web server. It then sends it to a // tee, which on one side has an hash checksum reader, and on the other // a gunzip reader writing to a file. It will compare the hash // checksum after the copy is done. func fetchFile(srcUrl, srcHash, dstFilename string) error { log.Infof("fetchFile: starting to fetch %v from %v", dstFilename, srcUrl) // open the URL req, err := http.NewRequest("GET", srcUrl, nil) if err != nil { return fmt.Errorf("NewRequest failed for %v: %v", srcUrl, err) } // we set the 'gzip' encoding ourselves so the library doesn't // do it for us and ends up using go gzip (we want to use our own // cgzip which is much faster) req.Header.Set("Accept-Encoding", "gzip") resp, err := http.DefaultClient.Do(req) if err != nil { return err } if resp.StatusCode != 200 { return fmt.Errorf("failed fetching %v: %v", srcUrl, resp.Status) } defer resp.Body.Close() // see if we need some uncompression var reader io.Reader = resp.Body ce := resp.Header.Get("Content-Encoding") if ce != "" { if ce == "gzip" { gz, err := cgzip.NewReader(reader) if err != nil { return err } defer gz.Close() reader = gz } else { return fmt.Errorf("unsupported Content-Encoding: %v", ce) } } return uncompressAndCheck(reader, srcHash, dstFilename, strings.HasSuffix(srcUrl, ".gz")) }
// uncompressAndCheck uses the provided reader to read data, and then // sends it to a tee, which on one side has an hash checksum reader, // and on the other a gunzip reader writing to a file. It will // compare the hash checksum after the copy is done. func uncompressAndCheck(reader io.Reader, srcHash, dstFilename string, needsUncompress bool) error { // create destination directory dir, filePrefix := path.Split(dstFilename) if dirErr := os.MkdirAll(dir, 0775); dirErr != nil { return dirErr } // create a temporary file to uncompress to dstFile, err := ioutil.TempFile(dir, filePrefix) if err != nil { return err } defer func() { // try to close and delete the file. // in the success case, the file will already be closed // and renamed, so all of this would fail anyway, no biggie dstFile.Close() os.Remove(dstFile.Name()) }() // create a buffering output dst := bufio.NewWriterSize(dstFile, 2*1024*1024) // create hash to write the compressed data to hasher := newHasher() // create a Tee: we split the HTTP input into the hasher // and into the gunziper tee := io.TeeReader(reader, hasher) // create the uncompresser var decompressor io.Reader if needsUncompress { gz, err := cgzip.NewReader(tee) if err != nil { return err } defer gz.Close() decompressor = gz } else { decompressor = tee } // see if we need to introduce failures if simulateFailures { failureCounter++ if failureCounter%10 == 0 { return fmt.Errorf("Simulated error") } } // copy the data. Will also write to the hasher if _, err = io.Copy(dst, decompressor); err != nil { return err } // check the hash hash := hasher.HashString() if srcHash != hash { return fmt.Errorf("hash mismatch for %v, %v != %v", dstFilename, srcHash, hash) } // we're good log.Infof("processed snapshot file: %v", dstFilename) dst.Flush() dstFile.Close() // atomically move uncompressed file if err := os.Chmod(dstFile.Name(), 0664); err != nil { return err } return os.Rename(dstFile.Name(), dstFilename) }