예제 #1
0
파일: main.go 프로젝트: bbezuk/scrapper
func ParseImages(node *h5.Node) (list []string) {
	var coreUrl string = "http://keindl-sport.hr"

	var baseSrc string
	query := transform.NewSelectorQuery("#bigpic")
	result := query.Apply(node)

	if len(result) > 0 {
		for _, attr := range result[0].Attr {
			if attr.Name == "src" {
				baseSrc = attr.Value
			}
		}

		list = append(list, fmt.Sprintf("%s%s", coreUrl, baseSrc))
		largeSrc := strings.Replace(baseSrc, "large", "thickbox", 1)
		list = append(list, fmt.Sprintf("%s%s", coreUrl, largeSrc))
	}

	query = transform.NewSelectorQuery("#thumbs_list_frame")
	result = query.Apply(node)

	if len(result) > 0 {
		list = append(list, ParseThumbs(result[0], &coreUrl)...)
	}

	return
}
예제 #2
0
파일: main.go 프로젝트: bbezuk/scrapper
func ParseDescriptions(node *h5.Node) (list []string) {
	query := transform.NewSelectorQuery("#idTab1")
	result := query.Apply(node)

	if len(result) < 1 {
		return
	}

	for _, chapter := range result[0].Children {
		var data string
		query = transform.NewSelectorQuery("span")
		med := query.Apply(chapter)

		if len(med) < 1 {
			continue
		}

		if len(med[0].Children) > 1 {
			data = ExtractSpecialDescription(med[0].Children)
		} else {
			data = med[0].Children[0].Data()
		}

		list = append(list, data)

	}

	return
}
예제 #3
0
파일: eztv.go 프로젝트: sunclx/magnetHoover
func eztvSnipp(r *h5.Node) []string {
	c := transform.NewSelectorQuery("td.forum_thread_post").Apply(r)
	id := transform.NewSelectorQuery("a").Apply(c[1])[0].Children[0].Data()
	ls := transform.NewSelectorQuery("a").Apply(c[2])
	var u string
	for _, l := range ls {
		u = eztvLinkHref(l)
		if strings.Split(u, ":")[0] == "magnet" {
			break
		}
	}
	return []string{id, u}
}
예제 #4
0
파일: main.go 프로젝트: bbezuk/scrapper
func ParseExtras(node *h5.Node) (list []Selectable) {
	query := transform.NewSelectorQuery("#attributes")
	result := query.Apply(node)

	if len(result) != 1 {
		return
	}

	for _, set := range result[0].Children {

		if len(set.Children) == 0 {
			continue
		}

		name := ExtractExtraName(set)
		opts := ExtractExtraVals(set)

		sel := Selectable{Name: name, Options: opts}

		list = append(list, sel)

	}

	return
}
예제 #5
0
파일: main.go 프로젝트: bbezuk/scrapper
func ParseFeatures(node *h5.Node) (list map[string]string) {
	selector := []string{".product_short_features_list nuc_short_festures_list", "table", "tbody", "tr"}
	tNode := node
	var result []*h5.Node
	list = make(map[string]string)

	for _, sel := range selector {
		query := transform.NewSelectorQuery(sel)

		result = query.Apply(tNode)

		if len(result) < 1 {
			return nil
		}

		tNode = result[0]
	}

	for _, res := range result {
		key, val := ExtractFeature(res)
		list[key] = val
	}

	return
}
예제 #6
0
파일: main.go 프로젝트: bbezuk/scrapper
func ExtractDetail(node *h5.Node) (key, val string) {
	query := transform.NewSelectorQuery(".product_feature_name")

	result := query.Apply(node)

	key = strings.Trim(html.UnescapeString(result[0].Children[0].Data()), " :\t\n\r")

	query = transform.NewSelectorQuery(".product_feature_value")

	result = query.Apply(node)

	val = html.UnescapeString(result[0].Children[0].Data())
	val = strings.Trim(val, " :\t\n\r")

	return
}
예제 #7
0
파일: main.go 프로젝트: bbezuk/scrapper
func ParseDetails(node *h5.Node) (list map[string]string) {
	list = make(map[string]string)

	query := transform.NewSelectorQuery("#idTab2")
	result := query.Apply(node)

	if len(result) > 0 {
		query = transform.NewSelectorQuery("tr")
		row := query.Apply(result[0])

		for _, res := range row {
			key, val := ExtractDetail(res)
			list[key] = val
		}
	}

	return
}
예제 #8
0
파일: main.go 프로젝트: bbezuk/scrapper
func ParseCategory(node *h5.Node) string {
	query := transform.NewSelectorQuery(".navigation_end")
	result := query.Apply(node)

	if len(result) < 1 {
		return ""
	}

	return html.UnescapeString(result[0].Children[0].Children[0].Data())
}
예제 #9
0
파일: main.go 프로젝트: bbezuk/scrapper
func CheckForAvailable(node *h5.Node) (bool, bool) {
	query := transform.NewSelectorQuery(".not_available")

	result := query.Apply(node)

	if len(result) == 1 {
		return true, false
	}

	query = transform.NewSelectorQuery(".available")

	result = query.Apply(node)

	if len(result) == 1 {
		return true, true
	}

	return false, false

}
예제 #10
0
파일: main.go 프로젝트: bbezuk/scrapper
func CheckForEditable(node []*h5.Node) ([]*h5.Node, bool) {
	query := transform.NewSelectorQuery(".editable")

	result := query.Apply(node[0])

	if len(result) == 1 {
		return result, true
	}

	return node, false
}
예제 #11
0
파일: main.go 프로젝트: bbezuk/scrapper
func ExtractFeature(node *h5.Node) (key, val string) {
	query := transform.NewSelectorQuery(".feature_name")

	result := query.Apply(node)

	key = html.UnescapeString(result[0].Children[0].Data())

	query = transform.NewSelectorQuery(".feature_value")

	result = query.Apply(node)

	result, editable := CheckForEditable(result)

	if editable {
		val = result[0].Children[0].Data()

		return
	}

	available, status := CheckForAvailable(result[0])

	if available {
		if status {
			val = "Dostupno"
		} else {
			val = "Nedostupno"
		}

		return
	}

	val = html.UnescapeString(result[0].Children[0].Data())
	val = strings.Trim(val, " \t\n\r")

	return
}
예제 #12
0
func parseLinks(content string) (urls []string, err error) {
	doc, err := transform.NewDoc(content)
	if err == nil {
		selector := transform.NewSelectorQuery("a")
		nodes := selector.Apply(doc)
		for i := 0; i < len(nodes); i++ {
			for j := 0; j < len(nodes[i].Attr); j++ {
				if nodes[i].Attr[j].Name == "href" && strings.HasPrefix(nodes[i].Attr[j].Value, "http") {
					urls = append(urls, nodes[i].Attr[j].Value)
				}
			}
		}
	}
	return urls, err
}
예제 #13
0
//Get the names of people who commented on a pin
func getPinCommenters(html string) []string {
	doc, _ := transform.NewDoc(html)
	selector := transform.NewSelectorQuery("div.comment convo clearfix", "p", "a")

	//Find all the dom nodes that match the selector above
	nodes := selector.Apply(doc)

	names := []string{}
	for i := range nodes {
		//The first child of the node will be the TextNode (the content)
		//of the tag. This contains the name we want so append to our output array
		names = append(names, nodes[i].Children[0].String())
	}

	return names
}
예제 #14
0
//Get the names of people who created a pin
func getPinners(html string) []string {
	doc, _ := transform.NewDoc(html)
	selector := transform.NewSelectorQuery("div.convo attribution clearfix", "p")

	//Find all the dom nodes that match the selector above
	nodes := selector.Apply(doc)

	names := []string{}
	for i := range nodes {
		//The name we are looking for is in a different place for the pinner
		//than in the above function (this was found by just looking at the source generated
		//for the pinterest front page)
		names = append(names, nodes[i].Children[1].Children[0].String())
	}

	return names
}
예제 #15
0
파일: main.go 프로젝트: bbezuk/scrapper
func ParseGeometry(node *h5.Node) string {
	query := transform.NewSelectorQuery("#geometry_image")
	result := query.Apply(node)
	var baseSrc string

	if len(result) > 0 {
		for _, attr := range result[0].Children[0].Attr {
			if attr.Name == "src" {
				baseSrc = attr.Value
			}
		}

		return html.UnescapeString(baseSrc)
	}

	return ""
}
예제 #16
0
파일: main.go 프로젝트: bbezuk/scrapper
//Function to parse title, all other parsing functions are similar and won't be explained,
//Important thing to note is unescaping of string at the end, since it is html we are parsing after all
func ParseTitle(node *h5.Node) string {
	selector := []string{"#primary_block", "h2"}
	tNode := node
	var result []*h5.Node

	for _, sel := range selector {
		query := transform.NewSelectorQuery(sel)
		result = query.Apply(tNode)
		if len(result) < 1 {
			return "No match"
		}
		tNode = result[0]

	}

	return html.UnescapeString(result[0].Children[0].Data())

}
예제 #17
0
파일: main.go 프로젝트: bbezuk/scrapper
func ParseThumbs(node *h5.Node, url *string) (list []string) {
	query := transform.NewSelectorQuery("img")
	result := query.Apply(node)

	var baseSrc string

	for _, res := range result {
		for _, attr := range res.Attr {
			if attr.Name == "src" {
				baseSrc = attr.Value
			}
		}

		list = append(list, fmt.Sprintf("%s%s", *url, baseSrc))
		largeSrc := strings.Replace(baseSrc, "medium", "thickbox", 1)
		list = append(list, fmt.Sprintf("%s%s", *url, largeSrc))
	}

	return
}
예제 #18
0
파일: gettit.go 프로젝트: kierdavis/gettit
import (
	"code.google.com/p/go-html-transform/h5"
	"code.google.com/p/go-html-transform/html/transform"
	"fmt"
	"github.com/kierdavis/ansi"
	"io"
	"net/http"
	"os"
	"os/exec"
	"path/filepath"
	"strconv"
	"strings"
)

var DownloadPageURLSelector = transform.NewSelectorQuery("html", "body", "section", "div", "section", "div", "div", "div", ".unit size1of3 lastUnit", "ul", ".user-action user-action-download", "a")
var DownloadURLSelector = transform.NewSelectorQuery("html", "body", "section", "div", "section", "div", "div", "section", "header", "div", ".unit size1of3 lastUnit", "ul", "li", "span", "a")

func GetAttr(node *h5.Node, name string) (value string) {
	for _, attr := range node.Attr {
		if attr.Name == name {
			return attr.Value
		}
	}

	return ""
}

func GetDownloadPageURL(pluginName string) (downloadPageURL string, err error) {
	pluginPageURL := "http://dev.bukkit.org/server-mods/" + pluginName + "/"
	fmt.Printf("[%s] Fetching %s\n", pluginName, pluginPageURL)
예제 #19
0
파일: eztv.go 프로젝트: sunclx/magnetHoover
func sel(n *h5.Node, selector string) []*h5.Node {
	return transform.NewSelectorQuery(selector).Apply(n)
}