func ParseImages(node *h5.Node) (list []string) { var coreUrl string = "http://keindl-sport.hr" var baseSrc string query := transform.NewSelectorQuery("#bigpic") result := query.Apply(node) if len(result) > 0 { for _, attr := range result[0].Attr { if attr.Name == "src" { baseSrc = attr.Value } } list = append(list, fmt.Sprintf("%s%s", coreUrl, baseSrc)) largeSrc := strings.Replace(baseSrc, "large", "thickbox", 1) list = append(list, fmt.Sprintf("%s%s", coreUrl, largeSrc)) } query = transform.NewSelectorQuery("#thumbs_list_frame") result = query.Apply(node) if len(result) > 0 { list = append(list, ParseThumbs(result[0], &coreUrl)...) } return }
func ParseDescriptions(node *h5.Node) (list []string) { query := transform.NewSelectorQuery("#idTab1") result := query.Apply(node) if len(result) < 1 { return } for _, chapter := range result[0].Children { var data string query = transform.NewSelectorQuery("span") med := query.Apply(chapter) if len(med) < 1 { continue } if len(med[0].Children) > 1 { data = ExtractSpecialDescription(med[0].Children) } else { data = med[0].Children[0].Data() } list = append(list, data) } return }
func eztvSnipp(r *h5.Node) []string { c := transform.NewSelectorQuery("td.forum_thread_post").Apply(r) id := transform.NewSelectorQuery("a").Apply(c[1])[0].Children[0].Data() ls := transform.NewSelectorQuery("a").Apply(c[2]) var u string for _, l := range ls { u = eztvLinkHref(l) if strings.Split(u, ":")[0] == "magnet" { break } } return []string{id, u} }
func ParseExtras(node *h5.Node) (list []Selectable) { query := transform.NewSelectorQuery("#attributes") result := query.Apply(node) if len(result) != 1 { return } for _, set := range result[0].Children { if len(set.Children) == 0 { continue } name := ExtractExtraName(set) opts := ExtractExtraVals(set) sel := Selectable{Name: name, Options: opts} list = append(list, sel) } return }
func ParseFeatures(node *h5.Node) (list map[string]string) { selector := []string{".product_short_features_list nuc_short_festures_list", "table", "tbody", "tr"} tNode := node var result []*h5.Node list = make(map[string]string) for _, sel := range selector { query := transform.NewSelectorQuery(sel) result = query.Apply(tNode) if len(result) < 1 { return nil } tNode = result[0] } for _, res := range result { key, val := ExtractFeature(res) list[key] = val } return }
func ExtractDetail(node *h5.Node) (key, val string) { query := transform.NewSelectorQuery(".product_feature_name") result := query.Apply(node) key = strings.Trim(html.UnescapeString(result[0].Children[0].Data()), " :\t\n\r") query = transform.NewSelectorQuery(".product_feature_value") result = query.Apply(node) val = html.UnescapeString(result[0].Children[0].Data()) val = strings.Trim(val, " :\t\n\r") return }
func ParseDetails(node *h5.Node) (list map[string]string) { list = make(map[string]string) query := transform.NewSelectorQuery("#idTab2") result := query.Apply(node) if len(result) > 0 { query = transform.NewSelectorQuery("tr") row := query.Apply(result[0]) for _, res := range row { key, val := ExtractDetail(res) list[key] = val } } return }
func ParseCategory(node *h5.Node) string { query := transform.NewSelectorQuery(".navigation_end") result := query.Apply(node) if len(result) < 1 { return "" } return html.UnescapeString(result[0].Children[0].Children[0].Data()) }
func CheckForAvailable(node *h5.Node) (bool, bool) { query := transform.NewSelectorQuery(".not_available") result := query.Apply(node) if len(result) == 1 { return true, false } query = transform.NewSelectorQuery(".available") result = query.Apply(node) if len(result) == 1 { return true, true } return false, false }
func CheckForEditable(node []*h5.Node) ([]*h5.Node, bool) { query := transform.NewSelectorQuery(".editable") result := query.Apply(node[0]) if len(result) == 1 { return result, true } return node, false }
func ExtractFeature(node *h5.Node) (key, val string) { query := transform.NewSelectorQuery(".feature_name") result := query.Apply(node) key = html.UnescapeString(result[0].Children[0].Data()) query = transform.NewSelectorQuery(".feature_value") result = query.Apply(node) result, editable := CheckForEditable(result) if editable { val = result[0].Children[0].Data() return } available, status := CheckForAvailable(result[0]) if available { if status { val = "Dostupno" } else { val = "Nedostupno" } return } val = html.UnescapeString(result[0].Children[0].Data()) val = strings.Trim(val, " \t\n\r") return }
func parseLinks(content string) (urls []string, err error) { doc, err := transform.NewDoc(content) if err == nil { selector := transform.NewSelectorQuery("a") nodes := selector.Apply(doc) for i := 0; i < len(nodes); i++ { for j := 0; j < len(nodes[i].Attr); j++ { if nodes[i].Attr[j].Name == "href" && strings.HasPrefix(nodes[i].Attr[j].Value, "http") { urls = append(urls, nodes[i].Attr[j].Value) } } } } return urls, err }
//Get the names of people who commented on a pin func getPinCommenters(html string) []string { doc, _ := transform.NewDoc(html) selector := transform.NewSelectorQuery("div.comment convo clearfix", "p", "a") //Find all the dom nodes that match the selector above nodes := selector.Apply(doc) names := []string{} for i := range nodes { //The first child of the node will be the TextNode (the content) //of the tag. This contains the name we want so append to our output array names = append(names, nodes[i].Children[0].String()) } return names }
//Get the names of people who created a pin func getPinners(html string) []string { doc, _ := transform.NewDoc(html) selector := transform.NewSelectorQuery("div.convo attribution clearfix", "p") //Find all the dom nodes that match the selector above nodes := selector.Apply(doc) names := []string{} for i := range nodes { //The name we are looking for is in a different place for the pinner //than in the above function (this was found by just looking at the source generated //for the pinterest front page) names = append(names, nodes[i].Children[1].Children[0].String()) } return names }
func ParseGeometry(node *h5.Node) string { query := transform.NewSelectorQuery("#geometry_image") result := query.Apply(node) var baseSrc string if len(result) > 0 { for _, attr := range result[0].Children[0].Attr { if attr.Name == "src" { baseSrc = attr.Value } } return html.UnescapeString(baseSrc) } return "" }
//Function to parse title, all other parsing functions are similar and won't be explained, //Important thing to note is unescaping of string at the end, since it is html we are parsing after all func ParseTitle(node *h5.Node) string { selector := []string{"#primary_block", "h2"} tNode := node var result []*h5.Node for _, sel := range selector { query := transform.NewSelectorQuery(sel) result = query.Apply(tNode) if len(result) < 1 { return "No match" } tNode = result[0] } return html.UnescapeString(result[0].Children[0].Data()) }
func ParseThumbs(node *h5.Node, url *string) (list []string) { query := transform.NewSelectorQuery("img") result := query.Apply(node) var baseSrc string for _, res := range result { for _, attr := range res.Attr { if attr.Name == "src" { baseSrc = attr.Value } } list = append(list, fmt.Sprintf("%s%s", *url, baseSrc)) largeSrc := strings.Replace(baseSrc, "medium", "thickbox", 1) list = append(list, fmt.Sprintf("%s%s", *url, largeSrc)) } return }
import ( "code.google.com/p/go-html-transform/h5" "code.google.com/p/go-html-transform/html/transform" "fmt" "github.com/kierdavis/ansi" "io" "net/http" "os" "os/exec" "path/filepath" "strconv" "strings" ) var DownloadPageURLSelector = transform.NewSelectorQuery("html", "body", "section", "div", "section", "div", "div", "div", ".unit size1of3 lastUnit", "ul", ".user-action user-action-download", "a") var DownloadURLSelector = transform.NewSelectorQuery("html", "body", "section", "div", "section", "div", "div", "section", "header", "div", ".unit size1of3 lastUnit", "ul", "li", "span", "a") func GetAttr(node *h5.Node, name string) (value string) { for _, attr := range node.Attr { if attr.Name == name { return attr.Value } } return "" } func GetDownloadPageURL(pluginName string) (downloadPageURL string, err error) { pluginPageURL := "http://dev.bukkit.org/server-mods/" + pluginName + "/" fmt.Printf("[%s] Fetching %s\n", pluginName, pluginPageURL)
func sel(n *h5.Node, selector string) []*h5.Node { return transform.NewSelectorQuery(selector).Apply(n) }