Esempio n. 1
0
func MimeDetector(sf types.SetupFunction) {
	in, out := sf.SetName("MimeDetector").AsFilter("UrlParser").Build()
	defer close(out)

	buffer := make([]byte, 512)

	for urlBM := range in {
		if sf.Cancelled() {
			continue
		}
		url := ToString(urlBM)
		body := Download(url)
		if body == nil {
			continue
		}
		func(body io.ReadCloser) {
			defer body.Close()
			n, err := body.Read(buffer)

			if err == nil || err == io.EOF {
				encoded := http.DetectContentType(buffer[:n]) + "->" + url
				out <- NewStringMarshaler(encoded)
			}
		}(body)
	}
}
Esempio n. 2
0
func UrlParser(sf types.SetupFunction) {
	in, out := sf.SetName("UrlParser").AsFilter("UrlProducer").Build()
	defer close(out)

	for urlBM := range in {
		if sf.Cancelled() {
			continue
		}
		urlStr := ToString(urlBM)
		if path.Ext(urlStr) == "" && urlStr[len(urlStr)-1] != '/' {
			urlStr += "/"
		}
		url, err := urlParser.Parse(urlStr)
		if err != nil {
			continue
		}

		body := Download(url.String())
		for link := range fetchLinks(body) {
			joinedLink, err := url.Parse(link)
			if err != nil {
				continue
			}

			out <- NewStringMarshaler(joinedLink.String())
		}
	}
}
Esempio n. 3
0
func pathProducer(sf types.SetupFunction, argv []string) {
	out := sf.SetName("PathProducer").AsProducer().Build()
	defer close(out)

	for _, path := range argv {
		out <- NewStringMarshaler(path)
	}
}
Esempio n. 4
0
func PathValidator(sf types.SetupFunction, parent string) {
	in, out := sf.SetName("PathValidator").AsFilter(parent).Build()
	defer close(out)

	for path := range in {
		if _, err := os.Stat(ToString(path)); err == nil {
			out <- path
		}
	}
}
Esempio n. 5
0
func urlLooper(sf types.SetupFunction, feeder chan<- interface{}, done types.AtomicBool) {
	in := sf.AsConsumer("MimeSplitterHtml").Build()

	for url := range in {
		if !done.Get() {
			u := ToString(url)
			feeder <- u
		}
	}
}
Esempio n. 6
0
func urlFeeder(sf types.SetupFunction, feeder <-chan interface{}, done types.AtomicBool) {
	out := sf.SetName("UrlFeeder").AsProducer().Build()
	defer close(out)

	for url := range feeder {
		if !done.Get() {
			out <- NewStringMarshaler(url.(string))
		}
	}
}
Esempio n. 7
0
func wordPrinter(sf types.SetupFunction, done chan struct{}) {
	defer close(done)
	in := sf.AsConsumer("github.com/apoydence/hydra/examples/wordCount.FinalWordCounter").Build()

	for wordMap := range in {
		for k, v := range ToMap(wordMap) {
			println(k, v)
		}
	}
}
Esempio n. 8
0
func WordCounter(sf types.SetupFunction) {
	in, out := sf.AsFilter("github.com/apoydence/hydra/examples/wordCount.SymbolRemover").Build()
	defer close(out)
	m := make(map[string]uint32)

	for word := range in {
		incMap(ToString(word), 1, m)
	}

	out <- NewWordCountMarshaler(m)
}
Esempio n. 9
0
func FinalWordCounter(sf types.SetupFunction) {
	in, out := sf.AsFilter("github.com/apoydence/hydra/examples/wordCount.WordCounter").Build()
	defer close(out)

	m := make(map[string]uint32)

	for wordMap := range in {
		for k, v := range ToMap(wordMap) {
			incMap(k, v, m)
		}
	}

	out <- NewWordCountMarshaler(m)
}
Esempio n. 10
0
func MimeSplitterHtml(sf types.SetupFunction) {
	in, out := sf.SetName("MimeSplitterHtml").AsFilter("MimeDetector").Build()
	defer close(out)
	for urlBM := range in {
		if sf.Cancelled() {
			continue
		}
		url := ToString(urlBM)
		mime, u := decodeMimeUrl(url)
		if strings.Contains(mime, "html") {
			out <- NewStringMarshaler(u)
		}
	}
}
Esempio n. 11
0
func SymbolRemover(sf types.SetupFunction) {
	in, out := sf.AsFilter("github.com/apoydence/hydra/examples/wordCount.WordExtractor").Build()
	defer close(out)

	for word := range in {
		str := make([]byte, 0)
		bytes := []byte(strings.ToLower(ToString(word)))
		for _, x := range bytes {
			if (x >= 0x30 && x <= 0x39) || (x >= 0x61 && x <= 0x7a) {
				str = append(str, x)
			}
		}
		if len(str) > 0 {
			out <- NewStringMarshaler(string(str))
		}
	}
}
Esempio n. 12
0
func WordExtractor(sf types.SetupFunction) {
	in, out := sf.AsFilter("PathValidator").Build()
	defer close(out)

	for path := range in {
		p := ToString(path)
		f, err := os.Open(p)
		if err == nil {
			scanner := bufio.NewScanner(f)
			scanner.Split(bufio.ScanWords)
			for scanner.Scan() {
				out <- NewStringMarshaler(scanner.Text())
			}
		}

	}
}
Esempio n. 13
0
func UrlProducer(sf types.SetupFunction, parent string) {
	in, out := sf.SetName("UrlProducer").AsFilter(parent).Build()
	defer close(out)

	visitedUrls := make(map[string]interface{})

	for urlBm := range in {
		if sf.Cancelled() {
			continue
		}
		url := ToString(urlBm)
		if _, visited := visitedUrls[url]; !visited {
			visitedUrls[url] = nil
			out <- NewStringMarshaler(url)
		}
	}
}
Esempio n. 14
0
func textDownloader(sf types.SetupFunction, download string, mb int, done chan struct{}, closer func()) {
	in := sf.AsConsumer("MimeSplitterText").Build()

	var totalSize int64 = 0

	for url := range in {
		if totalSize >= int64(mb)*1024*1024 {
			closer()
			continue
		}

		u := ToString(url)
		println("download", u, path.Base(u))
		totalSize += saveToFile(Download(u), path.Join(download, path.Base(u)))
		println("Size", totalSize)
	}

	close(done)
}