示例#1
0
func MimeDetector(sf types.SetupFunction) {
	in, out := sf.SetName("MimeDetector").AsFilter("UrlParser").Build()
	defer close(out)

	buffer := make([]byte, 512)

	for urlBM := range in {
		if sf.Cancelled() {
			continue
		}
		url := ToString(urlBM)
		body := Download(url)
		if body == nil {
			continue
		}
		func(body io.ReadCloser) {
			defer body.Close()
			n, err := body.Read(buffer)

			if err == nil || err == io.EOF {
				encoded := http.DetectContentType(buffer[:n]) + "->" + url
				out <- NewStringMarshaler(encoded)
			}
		}(body)
	}
}
示例#2
0
func UrlParser(sf types.SetupFunction) {
	in, out := sf.SetName("UrlParser").AsFilter("UrlProducer").Build()
	defer close(out)

	for urlBM := range in {
		if sf.Cancelled() {
			continue
		}
		urlStr := ToString(urlBM)
		if path.Ext(urlStr) == "" && urlStr[len(urlStr)-1] != '/' {
			urlStr += "/"
		}
		url, err := urlParser.Parse(urlStr)
		if err != nil {
			continue
		}

		body := Download(url.String())
		for link := range fetchLinks(body) {
			joinedLink, err := url.Parse(link)
			if err != nil {
				continue
			}

			out <- NewStringMarshaler(joinedLink.String())
		}
	}
}
示例#3
0
func pathProducer(sf types.SetupFunction, argv []string) {
	out := sf.SetName("PathProducer").AsProducer().Build()
	defer close(out)

	for _, path := range argv {
		out <- NewStringMarshaler(path)
	}
}
示例#4
0
func PathValidator(sf types.SetupFunction, parent string) {
	in, out := sf.SetName("PathValidator").AsFilter(parent).Build()
	defer close(out)

	for path := range in {
		if _, err := os.Stat(ToString(path)); err == nil {
			out <- path
		}
	}
}
示例#5
0
func urlFeeder(sf types.SetupFunction, feeder <-chan interface{}, done types.AtomicBool) {
	out := sf.SetName("UrlFeeder").AsProducer().Build()
	defer close(out)

	for url := range feeder {
		if !done.Get() {
			out <- NewStringMarshaler(url.(string))
		}
	}
}
示例#6
0
func MimeSplitterHtml(sf types.SetupFunction) {
	in, out := sf.SetName("MimeSplitterHtml").AsFilter("MimeDetector").Build()
	defer close(out)
	for urlBM := range in {
		if sf.Cancelled() {
			continue
		}
		url := ToString(urlBM)
		mime, u := decodeMimeUrl(url)
		if strings.Contains(mime, "html") {
			out <- NewStringMarshaler(u)
		}
	}
}
示例#7
0
func UrlProducer(sf types.SetupFunction, parent string) {
	in, out := sf.SetName("UrlProducer").AsFilter(parent).Build()
	defer close(out)

	visitedUrls := make(map[string]interface{})

	for urlBm := range in {
		if sf.Cancelled() {
			continue
		}
		url := ToString(urlBm)
		if _, visited := visitedUrls[url]; !visited {
			visitedUrls[url] = nil
			out <- NewStringMarshaler(url)
		}
	}
}