Golang Selection.Find Examples

Programming Language: Golang

Namespace/Package Name: github.com/advancedlogic/goquery

Class/Type: Selection

Method/Function: Find

Examples at hotexamples.com: 6

Golang Selection.Find - 6 examples found. These are the top rated real world Golang examples of github.com/advancedlogic/goquery.Selection.Find extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Get(10)

Attr(7)

Find(6)

Text(4)

Nodes(3)

Each(2)

Prev(2)

Size(2)

Union(2)

Next(1)

Parent(1)

Siblings(1)

Example #1

Show file

File: extractor.go Project: minond/GoOse

func (this *contentExtractor) getSiblingsContent(currentSibling *goquery.Selection, baselinescoreSiblingsPara float64) []*goquery.Selection {
	ps := make([]*goquery.Selection, 0)
	if currentSibling.Get(0).DataAtom.String() == "p" && len(currentSibling.Text()) > 0 {
		ps = append(ps, currentSibling)
		return ps
	} else {
		potentialParagraphs := currentSibling.Find("p")
		potentialParagraphs.Each(func(i int, s *goquery.Selection) {
			text := s.Text()
			if len(text) > 0 {
				ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, text)
				paragraphScore := ws.stopWordCount
				siblingBaselineScore := 0.30
				highLinkDensity := this.isHighLinkDensity(s)
				score := siblingBaselineScore * baselinescoreSiblingsPara
				if score < float64(paragraphScore) && !highLinkDensity {
					node := new(html.Node)
					node.Type = html.TextNode
					node.Data = text
					node.DataAtom = atom.P
					nodes := make([]*html.Node, 1)
					nodes[0] = node
					newSelection := new(goquery.Selection)
					newSelection.Nodes = nodes
					ps = append(ps, newSelection)
				}
			}

		})
	}
	return ps
}

Example #2

Show file

File: extractor.go Project: minond/GoOse

//checks the density of links within a node, is there not much text and most of it contains bad links?
//if so it's no good
func (this *contentExtractor) isHighLinkDensity(node *goquery.Selection) bool {
	links := node.Find("a")
	if links == nil || links.Size() == 0 {
		return false
	}
	text := node.Text()
	words := strings.Split(text, " ")
	nwords := len(words)
	sb := make([]string, 0)
	links.Each(func(i int, s *goquery.Selection) {
		linkText := s.Text()
		sb = append(sb, linkText)
	})
	linkText := strings.Join(sb, "")
	linkWords := strings.Split(linkText, " ")
	nlinkWords := len(linkWords)
	nlinks := links.Size()
	linkDivisor := float64(nlinkWords) / float64(nwords)
	score := linkDivisor * float64(nlinks)

	if this.config.debug {
		logText := ""
		if len(node.Text()) >= 51 {
			logText = node.Text()[0:50]
		} else {
			logText = node.Text()
		}
		log.Printf("Calculated link density score as %1.5f for node %s\n", score, logText)
	}
	if score > 1.0 {
		return true
	}
	return false
}

Example #3

Show file

File: parser.go Project: postfix/GoOse

func (p Parser) getElementsByTags(div *goquery.Selection, tags []string) *goquery.Selection {
	selection := new(goquery.Selection)
	for _, tag := range tags {
		selections := div.Find(tag)
		if selections != nil {
			selection = selection.Union(selections)
		}
	}
	return selection
}

Example #4

Show file

File: extractor.go Project: minond/GoOse

func (this *contentExtractor) isTableAndNoParaExist(selection *goquery.Selection) bool {
	subParagraph := selection.Find("p")
	subParagraph.Each(func(i int, s *goquery.Selection) {
		txt := s.Text()
		if len(txt) < 25 {
			node := s.Get(0)
			parent := node.Parent
			parent.RemoveChild(node)
		}
	})

	subParagraph2 := selection.Find("p")
	if subParagraph2.Length() == 0 && selection.Get(0).DataAtom.String() != "td" {
		return true
	}
	return false
}

Example #5

Show file

File: extractor.go Project: minond/GoOse

//we could have long articles that have tons of paragraphs so if we tried to calculate the base score against
//the total text score of those paragraphs it would be unfair. So we need to normalize the score based on the average scoring
//of the paragraphs within the top node. For example if our total score of 10 paragraphs was 1000 but each had an average value of
//100 then 100 should be our base.
func (this *contentExtractor) getSiblingsScore(topNode *goquery.Selection) int {
	base := 100000
	paragraphNumber := 0
	paragraphScore := 0
	nodesToCheck := topNode.Find("p")
	nodesToCheck.Each(func(i int, s *goquery.Selection) {
		textNode := s.Text()
		ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, textNode)
		highLinkDensity := this.isHighLinkDensity(s)
		if ws.stopWordCount > 2 && !highLinkDensity {
			paragraphNumber++
			paragraphScore += ws.stopWordCount
		}
	})
	if paragraphNumber > 0 {
		base = paragraphScore / paragraphNumber
	}
	return base
}

Example #6

Show file

File: videos.go Project: minond/GoOse

func (ve *VideoExtractor) getObjectTag(node *goquery.Selection) video {
	childEmbedTag := node.Find("embed")
	if ve.candidates.Has(childEmbedTag) {
		ve.candidates.Remove(childEmbedTag)
	}
	srcNode := node.Find(`param[name="movie"]`)
	if srcNode == nil || srcNode.Length() == 0 {
		return video{}
	}

	src, _ := srcNode.Attr("value")
	provider := ve.getProvider(src)
	if provider == "" {
		return video{}
	}
	video := ve.getVideo(node)
	video.provider = provider
	video.src = src
	return video
}