func main() { flag.Parse() f := flow.New() hdfs.Source( f, "hdfs://localhost:9000/etc", 8, ).Filter(func(line string) bool { // println("filter:", line) return !strings.HasPrefix(line, "#") }).Map(func(line string, ch chan string) { for _, token := range strings.Split(line, ":") { ch <- token } }).Map(func(key string) int { // println("map:", key) return 1 }).Reduce(func(x int, y int) int { // println("reduce:", x+y) return x + y }).Map(func(x int) { println("count:", x) }).Run() }
func testJoin() { reg, err := regexp.Compile("[^A-Za-z0-9]+") if err != nil { panic(err) } tokenizer := func(line string, ch chan string) { line = reg.ReplaceAllString(line, "-") for _, token := range strings.Split(line, "-") { ch <- strings.ToLower(token) } } f1 := flow.New() leftWords := f1.TextFile( "/etc/passwd", 3, ).Map(tokenizer).Map(func(t string) (string, int) { return t, 1 }).Sort(nil).LocalReduceByKey(func(x, y int) int { return x + y }) rightWords := f1.TextFile( "word_count.go", 3, ).Map(tokenizer).Map(func(t string) (string, int) { return t, 1 }).Sort(nil).LocalReduceByKey(func(x, y int) int { return x + y }) leftWords.Join(rightWords).Map(func(key string, left, right int) { println(key, ":", left, ":", right) }).Run() }
func init() { f1 = flow.New() users := f1.Source(func(out chan User) { iterate("mongodb://127.0.0.1", "example", "users", func(iter *mgo.Iter) { var user User for iter.Next(&user) { out <- user } }) }, 1).Map(func(user User) (string, User) { return user.Email, user }).Partition(3) posts := f1.Source(func(out chan Post) { iterate("mongodb://127.0.0.1", "example", "posts", func(iter *mgo.Iter) { var post Post for iter.Next(&post) { out <- post } }) }, 1).Map(func(post Post) (string, Post) { return post.Email, post }).Partition(3) users.CoGroup(posts).Map(func(email string, users []User, posts []Post) (User, []Post) { if len(users) > 0 { return users[0], posts } else { return User{}, posts } }).Filter(func(user User, posts []Post) bool { return user.LastSeenTime.Before(time.Now().AddDate(-1, 0, 0)) }).Map(func(user User, posts []Post) { var totalSize int for _, post := range posts { for _, att := range post.Attachments { totalSize += att.Size } for _, att := range post.Inlines { totalSize += att.Size } } if len(posts) > 0 { fmt.Print(posts[0].Id, " ", posts[0].Email) } fmt.Println(" user", user.Email, "posts", len(posts), "bytes", totalSize, "lastSeen", user.LastSeenTime) }) }
func testInputOutputChannels() { ch := make(chan int) f1 := flow.New() source := f1.Channel(ch) left := source.Map(func(t int) (int, int) { return t, t * 2 }) right := source.Map(func(t int) (int, int) { return t, t * 3 }) outChannel := make(chan struct { X, Y, Z int }) left.Join(right).AddOutput(outChannel) outInt := make(chan int) source.AddOutput(outInt) flow.Ready() var wg sync.WaitGroup goStart(&wg, func() { f1.Run() }) goStart(&wg, func() { for out := range outInt { fmt.Printf("source %d \n", out) } }) goStart(&wg, func() { for out := range outChannel { fmt.Printf("%d : %d\n", out.X, out.Y) } }) limit := 5 for i := 0; i < limit; i++ { ch <- i ch <- i } close(ch) wg.Wait() }
func main() { flow.New().TextFile( "/etc/passwd", 3, ).Filter(func(line string) bool { return !strings.HasPrefix(line, "#") }).Map(func(line string, ch chan string) { for _, token := range strings.Split(line, ":") { ch <- token } }).Map(func(key string) int { return 1 }).Reduce(func(x int, y int) int { return x + y }).Map(func(x int) { println("count:", x) }).Run() }
func testPartitionAndSort() { flow.New().TextFile( "/etc/hosts", 7, ).Partition( 2, ).Map(func(line string) string { return line }).Sort(func(a string, b string) bool { if strings.Compare(a, b) < 0 { return true } return false }).Map(func(line string) { println(line) }).Run() }
func testSelfJoin() { words := flow.New().TextFile( "/etc/passwd", 3, ).Filter(func(line string) bool { return !strings.HasPrefix(line, "#") }).Map(func(line string, ch chan string) { for _, token := range strings.Split(line, ":") { ch <- token } }).Map(func(line string) (string, string) { return line, line }) words.Join(words).Map(func(key, left, right string) { println(key, ":", left, ":", right) }).Run() }
func testBasicMapReduce() { flow.New().TextFile( "/etc/passwd", 2, ).Filter(func(line string) bool { // println("filter:", line) return !strings.HasPrefix(line, "#") }).Map(func(line string, ch chan string) { for _, token := range strings.Split(line, ":") { ch <- token } }).Map(func(key string) int { // println("map:", key) return 1 }).Reduce(func(x int, y int) int { // println("reduce:", x+y) return x + y }).Map(func(x int) { println("count:", x) }).Run() }
func testUnrolledStaticLoop() { ch := make(chan int) f1 := flow.New() left := f1.Channel(ch).Partition(2).Map(func(t int) (int, int) { return t, t * 2 }) for i := 0; i < 7; i++ { left = left.Map(func(x, y int) (int, int) { return x + 1, y + 1 }) } outChannel := make(chan struct { X, Y int }) left.AddOutput(outChannel) flow.Ready() var wg sync.WaitGroup goStart(&wg, func() { f1.Run() }) goStart(&wg, func() { for out := range outChannel { fmt.Printf("%d : %d\n", out.X, out.Y) } }) limit := 5 for i := 0; i < limit; i++ { ch <- i } close(ch) wg.Wait() }
"flag" "fmt" _ "github.com/chrislusf/glow/driver" "github.com/chrislusf/glow/flow" "github.com/wilseypa/rphash-golang/itemset" "github.com/wilseypa/rphash-golang/parse" "github.com/wilseypa/rphash-golang/reader" "github.com/wilseypa/rphash-golang/stream" "github.com/wilseypa/rphash-golang/utils" "os" "sync" "time" ) var ( f = flow.New() expectedDimensions = -1 numClusters = 6 ) type Centroid struct { C *itemset.Centroid } func goStart(wg *sync.WaitGroup, fn func()) { wg.Add(1) go func() { defer wg.Done() fn() }() }
"fmt" "io/ioutil" "strings" _ "github.com/chrislusf/glow/driver" "github.com/chrislusf/glow/flow" ) type WordSentence struct { Word string LineNumber int } var ( fileName = flag.String("file", "/etc/passwd", "name of a text file") f1 = flow.New() f2 = flow.New() ) func init() { f1.Source(func(out chan WordSentence) { bytes, err := ioutil.ReadFile(*fileName) if err != nil { println("Failed to read", *fileName) return } lines := strings.Split(string(bytes), "\n") for lineNumber, line := range lines { for _, word := range strings.Split(line, " ") { if word != "" { out <- WordSentence{word, lineNumber}
} type URLTuple struct { gURL string s3Name string msWait int } type statusTuple struct { pass int fail int } var ( fileName = flag.String("file", "/etc/passwd", "name of a text file") f1 = flow.New() f2 = flow.New() f3 = flow.New() fw = flow.New() ) type Feed struct { url string status int itemCount int complete bool itemsComplete bool index int } type FeedItem struct {