Golang Selection.Get Examples

Programming Language: Golang

Namespace/Package Name: github.com/PuerkitoBio/goquery

Class/Type: Selection

Method/Function: Get

Examples at hotexamples.com: 14

Golang Selection.Get - 14 examples found. These are the top rated real world Golang examples of github.com/PuerkitoBio/goquery.Selection.Get extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Attr(30)

Text(30)

Find(30)

Get(14)

Each(14)

Length(9)

Children(9)

Html(8)

Parent(6)

Is(4)

Nodes(4)

First(4)

FindMatcher(4)

Prev(4)

Eq(3)

Map(3)

ReplaceWithHtml(3)

Contents(3)

Closest(3)

SetAttr(3)

Size(3)

Union(2)

HasClass(2)

EachWithBreak(2)

Next(2)

Clone(2)

AttrOr(2)

AppendSelection(1)

Not(1)

FilterMatcher(1)

Parents(1)

Empty(1)

Remove(1)

RemoveFiltered(1)

Siblings(1)

ChildrenFiltered(1)

AppendNodes(1)

Example #1

Show file

File: cleaner.go Project: ngs/GoOse

func (this *cleaner) replaceWithPara(div *goquery.Selection) {
	if div.Size() > 0 {
		node := div.Get(0)
		node.Data = atom.P.String()
		node.DataAtom = atom.P
	}
}

Example #2

Show file

File: extractor.go Project: ngs/GoOse

//a lot of times the first paragraph might be the caption under an image so we'll want to make sure if we're going to
//boost a parent node that it should be connected to other paragraphs, at least for the first n paragraphs
//so we'll want to make sure that the next sibling is a paragraph and has at least some substatial weight to it
func (this *contentExtractor) isBoostable(node *goquery.Selection) bool {
	stepsAway := 0
	next := node.Next()
	for next != nil && stepsAway < node.Siblings().Length() {
		currentNodeTag := node.Get(0).DataAtom.String()
		if currentNodeTag == "p" {
			if stepsAway >= 3 {
				if this.config.debug {
					log.Println("Next paragraph is too far away, not boosting")
				}
				return false
			}

			paraText := node.Text()
			ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, paraText)
			if ws.stopWordCount > 5 {
				if this.config.debug {
					log.Println("We're gonna boost this node, seems content")
				}
				return true
			}
		}

		stepsAway++
		next = next.Next()
	}

	return false
}

Example #3

Show file

File: extractor.go Project: ngs/GoOse

func (this *contentExtractor) getSiblingsContent(currentSibling *goquery.Selection, baselinescoreSiblingsPara float64) []*goquery.Selection {
	ps := make([]*goquery.Selection, 0)
	if currentSibling.Get(0).DataAtom.String() == "p" && len(currentSibling.Text()) > 0 {
		ps = append(ps, currentSibling)
		return ps
	} else {
		potentialParagraphs := currentSibling.Find("p")
		potentialParagraphs.Each(func(i int, s *goquery.Selection) {
			text := s.Text()
			if len(text) > 0 {
				ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, text)
				paragraphScore := ws.stopWordCount
				siblingBaselineScore := 0.30
				highLinkDensity := this.isHighLinkDensity(s)
				score := siblingBaselineScore * baselinescoreSiblingsPara
				if score < float64(paragraphScore) && !highLinkDensity {
					node := new(html.Node)
					node.Type = html.TextNode
					node.Data = text
					node.DataAtom = atom.P
					nodes := make([]*html.Node, 1)
					nodes[0] = node
					newSelection := new(goquery.Selection)
					newSelection.Nodes = nodes
					ps = append(ps, newSelection)
				}
			}

		})
	}
	return ps
}

Example #4

Show file

File: wiki.go Project: roth1002/apex

// node returns a string representation of the selection.
func node(i int, s *goquery.Selection) string {
	switch node := s.Get(0); {
	case node.Data == "h1":
		return fmt.Sprintf(" \033[%dm# %s\033[0m\n\n", blue, text(s))
	case node.Data == "h2":
		return fmt.Sprintf(" \033[%dm## %s\033[0m\n\n", blue, text(s))
	case node.Data == "h3":
		return fmt.Sprintf(" \033[%dm### %s\033[0m\n\n", blue, text(s))
	case node.Data == "p":
		return fmt.Sprintf("\033[%dm%s\033[0m\n\n", none, indent(text(s), 1))
	case node.Data == "pre" || s.HasClass("highlight"):
		return fmt.Sprintf("\033[1m%s\033[0m\n\n", indent(text(s), 2))
	case node.Data == "a":
		return fmt.Sprintf("%s (%s) ", s.Text(), s.AttrOr("href", "missing link"))
	case node.Data == "li":
		return fmt.Sprintf("  • %s\n", contents(s))
	case node.Data == "ul":
		return fmt.Sprintf("%s\n", nodes(s))
	case node.Data == "code":
		return fmt.Sprintf("\033[1m%s\033[0m ", s.Text())
	case node.Type == html.TextNode:
		return strings.TrimSpace(node.Data)
	default:
		return ""
	}
}

Example #5

Show file

File: parser.go Project: hotei/GoOse

func (this *parser) delAttr(selection *goquery.Selection, attr string) {
	idx := this.indexOfAttribute(selection, attr)
	if idx > -1 {
		node := selection.Get(0)
		node.Attr = append(node.Attr[:idx], node.Attr[idx+1:]...)
	}
}

Example #6

Show file

File: parser.go Project: hotei/GoOse

func (this *parser) removeNode(selection *goquery.Selection) {
	if selection != nil {
		node := selection.Get(0)
		if node != nil && node.Parent != nil {
			node.Parent.RemoveChild(node)
		}
	}
}

Example #7

Show file

File: extractor.go Project: ngs/GoOse

func (this *contentExtractor) isNodescoreThresholdMet(node *goquery.Selection, e *goquery.Selection) bool {
	topNodeScore := this.getNodeGravityScore(node)
	currentNodeScore := this.getNodeGravityScore(e)
	threasholdScore := float64(topNodeScore) * 0.08
	if (float64(currentNodeScore) < threasholdScore) && e.Get(0).DataAtom.String() != "td" {
		return false
	}
	return true
}

Example #8

Show file

File: parser.go Project: hotei/GoOse

func (this *parser) indexOfAttribute(selection *goquery.Selection, attr string) int {
	node := selection.Get(0)
	for i, a := range node.Attr {
		if a.Key == attr {
			return i
		}
	}
	return -1
}

Example #9

Show file

File: readability.go Project: jpoehls/feedmailer

func removeNodes(s *goquery.Selection) {
	s.Each(func(i int, s *goquery.Selection) {
		parent := s.Parent()
		if parent.Length() == 0 {
			// TODO???
		} else {
			parent.Get(0).RemoveChild(s.Get(0))
		}
	})
}

Example #10

Show file

File: videos.go Project: hotei/GoOse

func (ve *VideoExtractor) getVideo(node *goquery.Selection) video {
	src := ve.getSrc(node)
	video := video{
		embedCode: ve.getEmbedCode(node),
		embedType: node.Get(0).DataAtom.String(),
		width:     ve.getWidth(node),
		height:    ve.getHeight(node),
		src:       src,
		provider:  ve.getProvider(src),
	}
	return video
}

Example #11

Show file

File: extractor.go Project: ngs/GoOse

func (this *contentExtractor) isTableAndNoParaExist(selection *goquery.Selection) bool {
	subParagraph := selection.Find("p")
	subParagraph.Each(func(i int, s *goquery.Selection) {
		txt := s.Text()
		if len(txt) < 25 {
			node := s.Get(0)
			parent := node.Parent
			parent.RemoveChild(node)
		}
	})

	subParagraph2 := selection.Find("p")
	if subParagraph2.Length() == 0 && selection.Get(0).DataAtom.String() != "td" {
		return true
	}
	return false
}

Example #12

Show file

File: parser.go Project: hotei/GoOse

func (this *parser) setAttr(selection *goquery.Selection, attr string, value string) {
	if selection.Size() > 0 {
		node := selection.Get(0)
		attrs := make([]html.Attribute, 0)
		for _, a := range node.Attr {
			if a.Key != attr {
				newAttr := new(html.Attribute)
				newAttr.Key = a.Key
				newAttr.Val = a.Val
				attrs = append(attrs, *newAttr)
			}
		}
		newAttr := new(html.Attribute)
		newAttr.Key = attr
		newAttr.Val = value
		attrs = append(attrs, *newAttr)
		node.Attr = attrs
	}
}

Example #13

Show file

File: OldIndex.go Project: captainju/indexof

func extractData(tds *goquery.Selection, parsed_url *url.URL, visited_urls map[string]string, result_chan chan string) {

	val := tds.Get(0).Attr[0]

	new_path, err := url.Parse(val.Val)
	if err != nil {
		panic(err)
	}
	recomposed_url := parsed_url.ResolveReference(new_path)

	if _, ok := visited_urls[recomposed_url.String()]; !ok {

		var full_url = recomposed_url.String()

		if !strings.Contains(recomposed_url.Path, ".") {
			visited_urls[full_url] = full_url
			newSearch(full_url, &visited_urls, result_chan)
		} else {
			result_chan <- full_url
		}
	}

}

Example #14

Show file

File: readability.go Project: jpoehls/feedmailer

func (d *Document) cleanConditionally(s *goquery.Selection, selector string) {
	if !d.CleanConditionally {
		return
	}

	s.Find(selector).Each(func(i int, s *goquery.Selection) {
		node := s.Get(0)
		weight := float32(d.classWeight(s))
		contentScore := float32(0)

		if c, ok := d.candidates[node]; ok {
			contentScore = c.score
		}

		if weight+contentScore < 0 {
			removeNodes(s)
			Logger.Printf("Conditionally cleaned %s%s with weight %f and content score %f\n", node.Data, getName(s), weight, contentScore)
			return
		}

		text := s.Text()
		if strings.Count(text, ",") < 10 {
			counts := map[string]int{
				"p":     s.Find("p").Length(),
				"img":   s.Find("img").Length(),
				"li":    s.Find("li").Length() - 100,
				"a":     s.Find("a").Length(),
				"embed": s.Find("embed").Length(),
				"input": s.Find("input").Length(),
			}

			contentLength := len(strings.TrimSpace(text))
			linkDensity := d.getLinkDensity(s)
			remove := false
			reason := ""

			if counts["img"] > counts["p"] {
				reason = "too many images"
				remove = true
			} else if counts["li"] > counts["p"] && !s.Is("ul,ol") {
				reason = "more <li>s than <p>s"
				remove = true
			} else if counts["input"] > int(counts["p"]/3.0) {
				reason = "less than 3x <p>s than <input>s"
				remove = true
			} else if contentLength < d.MinTextLength && (counts["img"] == 0 || counts["img"] > 2) {
				reason = "too short content length without a single image"
				remove = true
			} else if weight < 25 && linkDensity > 0.2 {
				reason = fmt.Sprintf("too many links for its weight (%f)", weight)
				remove = true
			} else if weight >= 25 && linkDensity > 0.5 {
				reason = fmt.Sprintf("too many links for its weight (%f)", weight)
				remove = true
			} else if (counts["embed"] == 1 && contentLength < 75) || counts["embed"] > 1 {
				reason = "<embed>s with too short a content length, or too many <embed>s"
				remove = true
			}

			if remove {
				Logger.Printf("Conditionally cleaned %s%s with weight %f and content score %f because it has %s\n", node.Data, getName(s), weight, contentScore, reason)
				removeNodes(s)
			}
		}
	})
}