func writeToOss(h HadoopFile, ossDir string, hdfsClient *hdfs.Client, ossClient *oss.Client, bucket string, b bool) error { log.Printf("Start to sync %s to ossDir %s\n", h.dirPrefix+h.fileInfo.Name(), ossDir) hdfsPath := h.dirPrefix + h.fileInfo.Name() timeStamp := fmt.Sprintf("%d", makeTimestamp()) ossPath := ossDir + timeStamp + h.fileInfo.Name() if b { dateStr, err := getDateStamp(hdfsPath) if err != nil { log.Printf("get date info from hdfsPath %s failed!", hdfsPath) return err } ossPath = dateStr + "/" + timeStamp + h.fileInfo.Name() if err != nil { return err } } reader, err := hdfsClient.Open(hdfsPath) if err != nil { log.Printf("hdfsClient opend failed! err message: %s\n", err) return err } defer reader.Close() err = ossClient.PutObject(bucket, ossPath, reader, nil) if err != nil { log.Printf("Oss Append Object failed %s!\n", err) return err } log.Printf("Finished sync %s to ossDir \n", h.dirPrefix+h.fileInfo.Name(), ossDir) return nil }
func walkDir(client *hdfs.Client, dir string, visit walkFunc) error { dirReader, err := client.Open(dir) if err != nil { return err } var partial []os.FileInfo for ; err != io.EOF; partial, err = dirReader.Readdir(100) { if err != nil { return err } for _, child := range partial { childPath := path.Join(dir, child.Name()) visit(childPath, child) if child.IsDir() { err = walkDir(client, childPath, visit) if err != nil { return err } } } } return nil }
func findStacktrace(client *hdfs.Client, name string) (string, error) { log.Println("Reading", name) file, err := client.Open(name) if err != nil { return "", err } data, err := ioutil.ReadAll(file) if err != nil { return "", err } var logs [][]byte lines := bytes.SplitAfter(data, []byte("\n")) for _, line := range lines { matched := false for _, token := range logsToSkip { if bytes.Contains(line, token) { matched = true break } } if !matched { logs = append(logs, line) } } log.Println("Finished", name) return string(bytes.Join(logs, nil)), nil }
// expandGlobs recursively expands globs in a filepath. It assumes the paths // are already cleaned and normalize (ie, absolute). func expandGlobs(client *hdfs.Client, globbedPath string) ([]string, error) { parts := strings.Split(globbedPath, "/")[1:] var res []string var splitAt int for splitAt = range parts { if hasGlob(parts[splitAt]) { break } } var base, glob, next, remainder string base = "/" + path.Join(parts[:splitAt]...) glob = parts[splitAt] if len(parts) > splitAt+1 { next = parts[splitAt+1] remainder = path.Join(parts[splitAt+2:]...) } else { next = "" remainder = "" } list, err := client.ReadDir(base) if err != nil { return nil, err } for _, fi := range list { match, _ := path.Match(glob, fi.Name()) if !match { continue } if !hasGlob(next) { _, err := client.Stat(path.Join(base, fi.Name(), next)) if err != nil && !os.IsNotExist(err) { return nil, err } else if os.IsNotExist(err) { continue } } newPath := path.Join(base, fi.Name(), next, remainder) if hasGlob(newPath) { children, err := expandGlobs(client, newPath) if err != nil { return nil, err } res = append(res, children...) } else { res = append(res, newPath) } } return res, nil }
func HadoopReadWriteGzipFile(f F, h HadoopFile, hdfsClient *hdfs.Client) (int64, error) { hdfsPath := h.dirPrefix + h.fileInfo.Name() hdfsReader, err := hdfsClient.Open(hdfsPath) if err != nil { log.Printf("hdfsClient opend failed! err message: %s\n", err) return 0, err } defer hdfsReader.Close() log.Printf("start to use io Copy \n") written, err := io.Copy(f.fw, hdfsReader) return written, err }
func moveTo(client *hdfs.Client, source, dest string, force bool) { if force { err := client.Remove(dest) if err != nil && !os.IsNotExist(err) { fatal(err) } } err := client.Rename(source, dest) if err != nil { fatal(err) } }
func walk(client *hdfs.Client, root string, visit walkFunc) error { rootInfo, err := client.Stat(root) if err != nil { return err } visit(root, rootInfo) if rootInfo.IsDir() { err = walkDir(client, root, visit) if err != nil { return err } } return nil }
func RecurseInfos(hdfsClient *hdfs.Client, hdfsDir string) ([]HadoopFile, error) { hdfsFiles := make([]HadoopFile, 0) dirs, err := hdfsClient.ReadDir(hdfsDir) if err != nil { return nil, err } for _, fileInfo := range dirs { if fileInfo.IsDir() { recurseInfos, err := RecurseInfos(hdfsClient, hdfsDir+fileInfo.Name()+"/") if err != nil { return nil, err } for _, f := range recurseInfos { hdfsFiles = append(hdfsFiles, f) } } else { f := HadoopFile{dirPrefix: hdfsDir, fileInfo: fileInfo} hdfsFiles = append(hdfsFiles, f) } } return hdfsFiles, nil }
func duDir(client *hdfs.Client, tw *tabwriter.Writer, dir string, humanReadable bool) int64 { dirReader, err := client.Open(dir) if err != nil { fmt.Fprintln(os.Stderr, err) return 0 } var partial []os.FileInfo var dirSize int64 for ; err != io.EOF; partial, err = dirReader.Readdir(100) { if err != nil { fmt.Fprintln(os.Stderr, err) return dirSize } for _, child := range partial { childPath := path.Join(dir, child.Name()) info, err := client.Stat(childPath) if err != nil { fmt.Fprintln(os.Stderr, err) return 0 } var size int64 if info.IsDir() { size = duDir(client, tw, childPath, humanReadable) } else { size = info.Size() } printSize(tw, size, childPath, humanReadable) dirSize += size } } return dirSize }
func moveTo(client *hdfs.Client, source, dest string, force bool) { resp, err := client.Stat(dest) if force { if err == nil && resp.IsDir() { if err = client.Remove(dest); err != nil { fatal(err) } } } else if err == nil { fatal(&os.PathError{"rename", dest, os.ErrExist}) } err = client.Rename(source, dest) if err != nil { fatal(err) } }
func printDir(client *hdfs.Client, dir string, long, all, humanReadable bool) { dirReader, err := client.Open(dir) if err != nil { fatal(err) } var tw *tabwriter.Writer if long { tw = lsTabWriter() defer tw.Flush() } if all { if long { dirInfo, err := client.Stat(dir) if err != nil { fatal(err) } parentPath := path.Join(dir, "..") parentInfo, err := client.Stat(parentPath) if err != nil { fatal(err) } printLong(tw, ".", dirInfo, humanReadable) printLong(tw, "..", parentInfo, humanReadable) } else { fmt.Println(".") fmt.Println("..") } } var partial []os.FileInfo for ; err != io.EOF; partial, err = dirReader.Readdir(100) { if err != nil { fatal(err) } printFiles(tw, partial, long, all, humanReadable) } if long { tw.Flush() } }