// expandGlobs recursively expands globs in a filepath. It assumes the paths // are already cleaned and normalize (ie, absolute). func expandGlobs(client *hdfs.Client, globbedPath string) ([]string, error) { parts := strings.Split(globbedPath, "/")[1:] var res []string var splitAt int for splitAt = range parts { if hasGlob(parts[splitAt]) { break } } var base, glob, next, remainder string base = "/" + path.Join(parts[:splitAt]...) glob = parts[splitAt] if len(parts) > splitAt+1 { next = parts[splitAt+1] remainder = path.Join(parts[splitAt+2:]...) } else { next = "" remainder = "" } list, err := client.ReadDir(base) if err != nil { return nil, err } for _, fi := range list { match, _ := path.Match(glob, fi.Name()) if !match { continue } if !hasGlob(next) { _, err := client.Stat(path.Join(base, fi.Name(), next)) if err != nil && !os.IsNotExist(err) { return nil, err } else if os.IsNotExist(err) { continue } } newPath := path.Join(base, fi.Name(), next, remainder) if hasGlob(newPath) { children, err := expandGlobs(client, newPath) if err != nil { return nil, err } res = append(res, children...) } else { res = append(res, newPath) } } return res, nil }
func RecurseInfos(hdfsClient *hdfs.Client, hdfsDir string) ([]HadoopFile, error) { hdfsFiles := make([]HadoopFile, 0) dirs, err := hdfsClient.ReadDir(hdfsDir) if err != nil { return nil, err } for _, fileInfo := range dirs { if fileInfo.IsDir() { recurseInfos, err := RecurseInfos(hdfsClient, hdfsDir+fileInfo.Name()+"/") if err != nil { return nil, err } for _, f := range recurseInfos { hdfsFiles = append(hdfsFiles, f) } } else { f := HadoopFile{dirPrefix: hdfsDir, fileInfo: fileInfo} hdfsFiles = append(hdfsFiles, f) } } return hdfsFiles, nil }