Пример #1
0
func main() {
	flag.Parse()

	f := flow.New()
	hdfs.Source(
		f,
		"hdfs://localhost:9000/etc",
		8,
	).Filter(func(line string) bool {
		// println("filter:", line)
		return !strings.HasPrefix(line, "#")
	}).Map(func(line string, ch chan string) {
		for _, token := range strings.Split(line, ":") {
			ch <- token
		}
	}).Map(func(key string) int {
		// println("map:", key)
		return 1
	}).Reduce(func(x int, y int) int {
		// println("reduce:", x+y)
		return x + y
	}).Map(func(x int) {
		println("count:", x)
	}).Run()
}
Пример #2
0
func testJoin() {
	reg, err := regexp.Compile("[^A-Za-z0-9]+")
	if err != nil {
		panic(err)
	}
	tokenizer := func(line string, ch chan string) {
		line = reg.ReplaceAllString(line, "-")
		for _, token := range strings.Split(line, "-") {
			ch <- strings.ToLower(token)
		}
	}
	f1 := flow.New()
	leftWords := f1.TextFile(
		"/etc/passwd", 3,
	).Map(tokenizer).Map(func(t string) (string, int) {
		return t, 1
	}).Sort(nil).LocalReduceByKey(func(x, y int) int {
		return x + y
	})

	rightWords := f1.TextFile(
		"word_count.go", 3,
	).Map(tokenizer).Map(func(t string) (string, int) {
		return t, 1
	}).Sort(nil).LocalReduceByKey(func(x, y int) int {
		return x + y
	})

	leftWords.Join(rightWords).Map(func(key string, left, right int) {
		println(key, ":", left, ":", right)
	}).Run()

}
Пример #3
0
func init() {
	f1 = flow.New()
	users := f1.Source(func(out chan User) {
		iterate("mongodb://127.0.0.1", "example", "users", func(iter *mgo.Iter) {
			var user User
			for iter.Next(&user) {
				out <- user
			}
		})
	}, 1).Map(func(user User) (string, User) {
		return user.Email, user
	}).Partition(3)

	posts := f1.Source(func(out chan Post) {
		iterate("mongodb://127.0.0.1", "example", "posts", func(iter *mgo.Iter) {
			var post Post
			for iter.Next(&post) {
				out <- post
			}
		})
	}, 1).Map(func(post Post) (string, Post) {
		return post.Email, post
	}).Partition(3)

	users.CoGroup(posts).Map(func(email string, users []User, posts []Post) (User, []Post) {
		if len(users) > 0 {
			return users[0], posts
		} else {
			return User{}, posts
		}
	}).Filter(func(user User, posts []Post) bool {
		return user.LastSeenTime.Before(time.Now().AddDate(-1, 0, 0))
	}).Map(func(user User, posts []Post) {
		var totalSize int
		for _, post := range posts {
			for _, att := range post.Attachments {
				totalSize += att.Size
			}
			for _, att := range post.Inlines {
				totalSize += att.Size
			}
		}
		if len(posts) > 0 {
			fmt.Print(posts[0].Id, " ", posts[0].Email)
		}
		fmt.Println(" user", user.Email, "posts", len(posts), "bytes", totalSize, "lastSeen", user.LastSeenTime)
	})

}
Пример #4
0
func testInputOutputChannels() {
	ch := make(chan int)
	f1 := flow.New()
	source := f1.Channel(ch)
	left := source.Map(func(t int) (int, int) {
		return t, t * 2
	})
	right := source.Map(func(t int) (int, int) {
		return t, t * 3
	})

	outChannel := make(chan struct {
		X, Y, Z int
	})

	left.Join(right).AddOutput(outChannel)

	outInt := make(chan int)
	source.AddOutput(outInt)

	flow.Ready()

	var wg sync.WaitGroup
	goStart(&wg, func() {
		f1.Run()
	})

	goStart(&wg, func() {
		for out := range outInt {
			fmt.Printf("source %d \n", out)
		}
	})

	goStart(&wg, func() {
		for out := range outChannel {
			fmt.Printf("%d : %d\n", out.X, out.Y)
		}
	})

	limit := 5
	for i := 0; i < limit; i++ {
		ch <- i
		ch <- i
	}
	close(ch)

	wg.Wait()
}
Пример #5
0
func main() {
	flow.New().TextFile(
		"/etc/passwd", 3,
	).Filter(func(line string) bool {
		return !strings.HasPrefix(line, "#")
	}).Map(func(line string, ch chan string) {
		for _, token := range strings.Split(line, ":") {
			ch <- token
		}
	}).Map(func(key string) int {
		return 1
	}).Reduce(func(x int, y int) int {
		return x + y
	}).Map(func(x int) {
		println("count:", x)
	}).Run()
}
Пример #6
0
func testPartitionAndSort() {
	flow.New().TextFile(
		"/etc/hosts", 7,
	).Partition(
		2,
	).Map(func(line string) string {
		return line
	}).Sort(func(a string, b string) bool {
		if strings.Compare(a, b) < 0 {
			return true
		}
		return false
	}).Map(func(line string) {
		println(line)
	}).Run()

}
Пример #7
0
func testSelfJoin() {
	words := flow.New().TextFile(
		"/etc/passwd", 3,
	).Filter(func(line string) bool {
		return !strings.HasPrefix(line, "#")
	}).Map(func(line string, ch chan string) {
		for _, token := range strings.Split(line, ":") {
			ch <- token
		}
	}).Map(func(line string) (string, string) {
		return line, line
	})

	words.Join(words).Map(func(key, left, right string) {
		println(key, ":", left, ":", right)
	}).Run()

}
Пример #8
0
func testBasicMapReduce() {
	flow.New().TextFile(
		"/etc/passwd", 2,
	).Filter(func(line string) bool {
		// println("filter:", line)
		return !strings.HasPrefix(line, "#")
	}).Map(func(line string, ch chan string) {
		for _, token := range strings.Split(line, ":") {
			ch <- token
		}
	}).Map(func(key string) int {
		// println("map:", key)
		return 1
	}).Reduce(func(x int, y int) int {
		// println("reduce:", x+y)
		return x + y
	}).Map(func(x int) {
		println("count:", x)
	}).Run()
}
Пример #9
0
func testUnrolledStaticLoop() {
	ch := make(chan int)
	f1 := flow.New()
	left := f1.Channel(ch).Partition(2).Map(func(t int) (int, int) {
		return t, t * 2
	})

	for i := 0; i < 7; i++ {
		left = left.Map(func(x, y int) (int, int) {
			return x + 1, y + 1
		})
	}

	outChannel := make(chan struct {
		X, Y int
	})

	left.AddOutput(outChannel)

	flow.Ready()

	var wg sync.WaitGroup
	goStart(&wg, func() {
		f1.Run()
	})

	goStart(&wg, func() {
		for out := range outChannel {
			fmt.Printf("%d : %d\n", out.X, out.Y)
		}
	})

	limit := 5
	for i := 0; i < limit; i++ {
		ch <- i
	}
	close(ch)

	wg.Wait()
}
Пример #10
0
	"flag"
	"fmt"
	_ "github.com/chrislusf/glow/driver"
	"github.com/chrislusf/glow/flow"
	"github.com/wilseypa/rphash-golang/itemset"
	"github.com/wilseypa/rphash-golang/parse"
	"github.com/wilseypa/rphash-golang/reader"
	"github.com/wilseypa/rphash-golang/stream"
	"github.com/wilseypa/rphash-golang/utils"
	"os"
	"sync"
	"time"
)

var (
	f                  = flow.New()
	expectedDimensions = -1
	numClusters        = 6
)

type Centroid struct {
	C *itemset.Centroid
}

func goStart(wg *sync.WaitGroup, fn func()) {
	wg.Add(1)
	go func() {
		defer wg.Done()
		fn()
	}()
}
Пример #11
0
	"fmt"
	"io/ioutil"
	"strings"

	_ "github.com/chrislusf/glow/driver"
	"github.com/chrislusf/glow/flow"
)

type WordSentence struct {
	Word       string
	LineNumber int
}

var (
	fileName = flag.String("file", "/etc/passwd", "name of a text file")
	f1       = flow.New()
	f2       = flow.New()
)

func init() {
	f1.Source(func(out chan WordSentence) {
		bytes, err := ioutil.ReadFile(*fileName)
		if err != nil {
			println("Failed to read", *fileName)
			return
		}
		lines := strings.Split(string(bytes), "\n")
		for lineNumber, line := range lines {
			for _, word := range strings.Split(line, " ") {
				if word != "" {
					out <- WordSentence{word, lineNumber}
Пример #12
0
}

type URLTuple struct {
	gURL   string
	s3Name string
	msWait int
}

type statusTuple struct {
	pass int
	fail int
}

var (
	fileName = flag.String("file", "/etc/passwd", "name of a text file")
	f1       = flow.New()
	f2       = flow.New()
	f3       = flow.New()
	fw       = flow.New()
)

type Feed struct {
	url           string
	status        int
	itemCount     int
	complete      bool
	itemsComplete bool
	index         int
}

type FeedItem struct {