func walkDir(client *hdfs.Client, dir string, visit walkFunc) error { dirReader, err := client.Open(dir) if err != nil { return err } var partial []os.FileInfo for ; err != io.EOF; partial, err = dirReader.Readdir(100) { if err != nil { return err } for _, child := range partial { childPath := path.Join(dir, child.Name()) visit(childPath, child) if child.IsDir() { err = walkDir(client, childPath, visit) if err != nil { return err } } } } return nil }
func findStacktrace(client *hdfs.Client, name string) (string, error) { log.Println("Reading", name) file, err := client.Open(name) if err != nil { return "", err } data, err := ioutil.ReadAll(file) if err != nil { return "", err } var logs [][]byte lines := bytes.SplitAfter(data, []byte("\n")) for _, line := range lines { matched := false for _, token := range logsToSkip { if bytes.Contains(line, token) { matched = true break } } if !matched { logs = append(logs, line) } } log.Println("Finished", name) return string(bytes.Join(logs, nil)), nil }
func writeToOss(h HadoopFile, ossDir string, hdfsClient *hdfs.Client, ossClient *oss.Client, bucket string, b bool) error { log.Printf("Start to sync %s to ossDir %s\n", h.dirPrefix+h.fileInfo.Name(), ossDir) hdfsPath := h.dirPrefix + h.fileInfo.Name() timeStamp := fmt.Sprintf("%d", makeTimestamp()) ossPath := ossDir + timeStamp + h.fileInfo.Name() if b { dateStr, err := getDateStamp(hdfsPath) if err != nil { log.Printf("get date info from hdfsPath %s failed!", hdfsPath) return err } ossPath = dateStr + "/" + timeStamp + h.fileInfo.Name() if err != nil { return err } } reader, err := hdfsClient.Open(hdfsPath) if err != nil { log.Printf("hdfsClient opend failed! err message: %s\n", err) return err } defer reader.Close() err = ossClient.PutObject(bucket, ossPath, reader, nil) if err != nil { log.Printf("Oss Append Object failed %s!\n", err) return err } log.Printf("Finished sync %s to ossDir \n", h.dirPrefix+h.fileInfo.Name(), ossDir) return nil }
func HadoopReadWriteGzipFile(f F, h HadoopFile, hdfsClient *hdfs.Client) (int64, error) { hdfsPath := h.dirPrefix + h.fileInfo.Name() hdfsReader, err := hdfsClient.Open(hdfsPath) if err != nil { log.Printf("hdfsClient opend failed! err message: %s\n", err) return 0, err } defer hdfsReader.Close() log.Printf("start to use io Copy \n") written, err := io.Copy(f.fw, hdfsReader) return written, err }
func printDir(client *hdfs.Client, dir string, long, all, humanReadable bool) { dirReader, err := client.Open(dir) if err != nil { fatal(err) } var tw *tabwriter.Writer if long { tw = lsTabWriter() defer tw.Flush() } if all { if long { dirInfo, err := client.Stat(dir) if err != nil { fatal(err) } parentPath := path.Join(dir, "..") parentInfo, err := client.Stat(parentPath) if err != nil { fatal(err) } printLong(tw, ".", dirInfo, humanReadable) printLong(tw, "..", parentInfo, humanReadable) } else { fmt.Println(".") fmt.Println("..") } } var partial []os.FileInfo for ; err != io.EOF; partial, err = dirReader.Readdir(100) { if err != nil { fatal(err) } printFiles(tw, partial, long, all, humanReadable) } if long { tw.Flush() } }
func duDir(client *hdfs.Client, tw *tabwriter.Writer, dir string, humanReadable bool) int64 { dirReader, err := client.Open(dir) if err != nil { fmt.Fprintln(os.Stderr, err) return 0 } var partial []os.FileInfo var dirSize int64 for ; err != io.EOF; partial, err = dirReader.Readdir(100) { if err != nil { fmt.Fprintln(os.Stderr, err) return dirSize } for _, child := range partial { childPath := path.Join(dir, child.Name()) info, err := client.Stat(childPath) if err != nil { fmt.Fprintln(os.Stderr, err) return 0 } var size int64 if info.IsDir() { size = duDir(client, tw, childPath, humanReadable) } else { size = info.Size() } printSize(tw, size, childPath, humanReadable) dirSize += size } } return dirSize }