func main() { namenode := flag.String("namenode", "localhost:9000", "HDFS namenode address.") webhdfs := flag.String("webhdfs", "localhost:50070", "WebHDFS address.") user := flag.String("user", "", "HDFS username. Could be empty.") flag.Parse() fs.HookupHDFS(*namenode, *webhdfs, *user) dir := path.Join(fmt.Sprintf("/hdfs/tmp/test/github.com/wangkuiyi/file/%v", time.Now().UnixNano())) file := path.Join(dir, "hello.txt") content := "Hello World!\n" if e := fs.Mkdir(dir); e != nil { log.Panicf("Mkdir(%v) failed", dir) } if w, e := fs.Create(file); e != nil { log.Panicf("Create(%v) failed", file) } else { fmt.Fprintf(w, content) w.Close() } if _, e := fs.Stat(file); os.IsNotExist(e) { log.Panicf("Expecting file exists, but not") } if r, e := fs.Open(file); e == nil { b, _ := ioutil.ReadAll(r) fmt.Println(string(b)) r.Close() } }
func BenchmarkBuildVocabAndVSharder(t *testing.B) { f, e := fs.Open("testdata/internet-zh.num") if e != nil { t.Skip(e) } defer f.Close() _, _, e = BuildVocabAndVSharder(f, 10, true) if e != nil { t.Skip(e) } }
func main() { freq := flag.String("tf", "", "Token frequency list. Each line consists of frequence and token.") shards := flag.Int("s", 10, "Hint of number of vshards. Might be less.") delUnbalanced := flag.Bool("d", true, "Delete singular and unbalanced vshards.") vocab := flag.String("vocab", "", "Gob encoded Vocab file") vshdr := flag.String("vshdr", "", "Gob encoded VSharder file.") flag.Parse() f, e := fs.Open(*freq) if e != nil { log.Fatal(e) } defer f.Close() v, e := fs.Create(*vocab) if e != nil { log.Fatal(e) } defer v.Close() s, e := fs.Create(*vshdr) if e != nil { log.Fatal(e) } defer s.Close() { vocab, vshdr, e := algo.BuildVocabAndVSharder(f, *shards, *delUnbalanced) if e != nil { log.Fatal(e) } fmt.Printf("Generated %v shards\n", vshdr.Num()) if e := gob.NewEncoder(v).Encode(vocab); e != nil { log.Fatal(e) } if e := gob.NewEncoder(s).Encode(vshdr); e != nil { log.Fatal(e) } } }