Golang Selection.Get示例

编程语言: Golang

命名空间/包名称: github.com/PuerkitoBio/goquery

类/类型: Selection

方法/功能: Get

hotexamples.com的示例: 14

Golang Selection.Get - 已找到14个示例。这些是从开源项目中提取的最受好评的github.com/PuerkitoBio/goquery.Selection.Get现实Golang示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Attr(30)

Text(30)

Find(30)

Get(14)

Each(14)

Length(9)

Children(9)

Html(8)

Parent(6)

Is(4)

Nodes(4)

First(4)

FindMatcher(4)

Prev(4)

Eq(3)

Map(3)

ReplaceWithHtml(3)

Contents(3)

Closest(3)

SetAttr(3)

Size(3)

Union(2)

HasClass(2)

EachWithBreak(2)

Next(2)

Clone(2)

AttrOr(2)

AppendSelection(1)

Not(1)

FilterMatcher(1)

Parents(1)

Empty(1)

Remove(1)

RemoveFiltered(1)

Siblings(1)

ChildrenFiltered(1)

AppendNodes(1)

示例#1

显示文件

文件： cleaner.go 项目： ngs/GoOse

func (this *cleaner) replaceWithPara(div *goquery.Selection) {
	if div.Size() > 0 {
		node := div.Get(0)
		node.Data = atom.P.String()
		node.DataAtom = atom.P
	}
}

示例#2

显示文件

文件： extractor.go 项目： ngs/GoOse

//a lot of times the first paragraph might be the caption under an image so we'll want to make sure if we're going to
//boost a parent node that it should be connected to other paragraphs, at least for the first n paragraphs
//so we'll want to make sure that the next sibling is a paragraph and has at least some substatial weight to it
func (this *contentExtractor) isBoostable(node *goquery.Selection) bool {
	stepsAway := 0
	next := node.Next()
	for next != nil && stepsAway < node.Siblings().Length() {
		currentNodeTag := node.Get(0).DataAtom.String()
		if currentNodeTag == "p" {
			if stepsAway >= 3 {
				if this.config.debug {
					log.Println("Next paragraph is too far away, not boosting")
				}
				return false
			}

			paraText := node.Text()
			ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, paraText)
			if ws.stopWordCount > 5 {
				if this.config.debug {
					log.Println("We're gonna boost this node, seems content")
				}
				return true
			}
		}

		stepsAway++
		next = next.Next()
	}

	return false
}

示例#3

显示文件

文件： extractor.go 项目： ngs/GoOse

func (this *contentExtractor) getSiblingsContent(currentSibling *goquery.Selection, baselinescoreSiblingsPara float64) []*goquery.Selection {
	ps := make([]*goquery.Selection, 0)
	if currentSibling.Get(0).DataAtom.String() == "p" && len(currentSibling.Text()) > 0 {
		ps = append(ps, currentSibling)
		return ps
	} else {
		potentialParagraphs := currentSibling.Find("p")
		potentialParagraphs.Each(func(i int, s *goquery.Selection) {
			text := s.Text()
			if len(text) > 0 {
				ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, text)
				paragraphScore := ws.stopWordCount
				siblingBaselineScore := 0.30
				highLinkDensity := this.isHighLinkDensity(s)
				score := siblingBaselineScore * baselinescoreSiblingsPara
				if score < float64(paragraphScore) && !highLinkDensity {
					node := new(html.Node)
					node.Type = html.TextNode
					node.Data = text
					node.DataAtom = atom.P
					nodes := make([]*html.Node, 1)
					nodes[0] = node
					newSelection := new(goquery.Selection)
					newSelection.Nodes = nodes
					ps = append(ps, newSelection)
				}
			}

		})
	}
	return ps
}

示例#4

显示文件

文件： wiki.go 项目： roth1002/apex

// node returns a string representation of the selection.
func node(i int, s *goquery.Selection) string {
	switch node := s.Get(0); {
	case node.Data == "h1":
		return fmt.Sprintf(" \033[%dm# %s\033[0m\n\n", blue, text(s))
	case node.Data == "h2":
		return fmt.Sprintf(" \033[%dm## %s\033[0m\n\n", blue, text(s))
	case node.Data == "h3":
		return fmt.Sprintf(" \033[%dm### %s\033[0m\n\n", blue, text(s))
	case node.Data == "p":
		return fmt.Sprintf("\033[%dm%s\033[0m\n\n", none, indent(text(s), 1))
	case node.Data == "pre" || s.HasClass("highlight"):
		return fmt.Sprintf("\033[1m%s\033[0m\n\n", indent(text(s), 2))
	case node.Data == "a":
		return fmt.Sprintf("%s (%s) ", s.Text(), s.AttrOr("href", "missing link"))
	case node.Data == "li":
		return fmt.Sprintf("  • %s\n", contents(s))
	case node.Data == "ul":
		return fmt.Sprintf("%s\n", nodes(s))
	case node.Data == "code":
		return fmt.Sprintf("\033[1m%s\033[0m ", s.Text())
	case node.Type == html.TextNode:
		return strings.TrimSpace(node.Data)
	default:
		return ""
	}
}

示例#5

显示文件

文件： parser.go 项目： hotei/GoOse

func (this *parser) delAttr(selection *goquery.Selection, attr string) {
	idx := this.indexOfAttribute(selection, attr)
	if idx > -1 {
		node := selection.Get(0)
		node.Attr = append(node.Attr[:idx], node.Attr[idx+1:]...)
	}
}

示例#6

显示文件

文件： parser.go 项目： hotei/GoOse

func (this *parser) removeNode(selection *goquery.Selection) {
	if selection != nil {
		node := selection.Get(0)
		if node != nil && node.Parent != nil {
			node.Parent.RemoveChild(node)
		}
	}
}

示例#7

显示文件

文件： extractor.go 项目： ngs/GoOse

func (this *contentExtractor) isNodescoreThresholdMet(node *goquery.Selection, e *goquery.Selection) bool {
	topNodeScore := this.getNodeGravityScore(node)
	currentNodeScore := this.getNodeGravityScore(e)
	threasholdScore := float64(topNodeScore) * 0.08
	if (float64(currentNodeScore) < threasholdScore) && e.Get(0).DataAtom.String() != "td" {
		return false
	}
	return true
}

示例#8

显示文件

文件： parser.go 项目： hotei/GoOse

func (this *parser) indexOfAttribute(selection *goquery.Selection, attr string) int {
	node := selection.Get(0)
	for i, a := range node.Attr {
		if a.Key == attr {
			return i
		}
	}
	return -1
}

示例#9

显示文件

文件： readability.go 项目： jpoehls/feedmailer

func removeNodes(s *goquery.Selection) {
	s.Each(func(i int, s *goquery.Selection) {
		parent := s.Parent()
		if parent.Length() == 0 {
			// TODO???
		} else {
			parent.Get(0).RemoveChild(s.Get(0))
		}
	})
}

示例#10

显示文件

文件： videos.go 项目： hotei/GoOse

func (ve *VideoExtractor) getVideo(node *goquery.Selection) video {
	src := ve.getSrc(node)
	video := video{
		embedCode: ve.getEmbedCode(node),
		embedType: node.Get(0).DataAtom.String(),
		width:     ve.getWidth(node),
		height:    ve.getHeight(node),
		src:       src,
		provider:  ve.getProvider(src),
	}
	return video
}

示例#11

显示文件

文件： extractor.go 项目： ngs/GoOse

func (this *contentExtractor) isTableAndNoParaExist(selection *goquery.Selection) bool {
	subParagraph := selection.Find("p")
	subParagraph.Each(func(i int, s *goquery.Selection) {
		txt := s.Text()
		if len(txt) < 25 {
			node := s.Get(0)
			parent := node.Parent
			parent.RemoveChild(node)
		}
	})

	subParagraph2 := selection.Find("p")
	if subParagraph2.Length() == 0 && selection.Get(0).DataAtom.String() != "td" {
		return true
	}
	return false
}

示例#12

显示文件

文件： parser.go 项目： hotei/GoOse

func (this *parser) setAttr(selection *goquery.Selection, attr string, value string) {
	if selection.Size() > 0 {
		node := selection.Get(0)
		attrs := make([]html.Attribute, 0)
		for _, a := range node.Attr {
			if a.Key != attr {
				newAttr := new(html.Attribute)
				newAttr.Key = a.Key
				newAttr.Val = a.Val
				attrs = append(attrs, *newAttr)
			}
		}
		newAttr := new(html.Attribute)
		newAttr.Key = attr
		newAttr.Val = value
		attrs = append(attrs, *newAttr)
		node.Attr = attrs
	}
}

示例#13

显示文件

文件： OldIndex.go 项目： captainju/indexof

func extractData(tds *goquery.Selection, parsed_url *url.URL, visited_urls map[string]string, result_chan chan string) {

	val := tds.Get(0).Attr[0]

	new_path, err := url.Parse(val.Val)
	if err != nil {
		panic(err)
	}
	recomposed_url := parsed_url.ResolveReference(new_path)

	if _, ok := visited_urls[recomposed_url.String()]; !ok {

		var full_url = recomposed_url.String()

		if !strings.Contains(recomposed_url.Path, ".") {
			visited_urls[full_url] = full_url
			newSearch(full_url, &visited_urls, result_chan)
		} else {
			result_chan <- full_url
		}
	}

}

示例#14

显示文件

文件： readability.go 项目： jpoehls/feedmailer

func (d *Document) cleanConditionally(s *goquery.Selection, selector string) {
	if !d.CleanConditionally {
		return
	}

	s.Find(selector).Each(func(i int, s *goquery.Selection) {
		node := s.Get(0)
		weight := float32(d.classWeight(s))
		contentScore := float32(0)

		if c, ok := d.candidates[node]; ok {
			contentScore = c.score
		}

		if weight+contentScore < 0 {
			removeNodes(s)
			Logger.Printf("Conditionally cleaned %s%s with weight %f and content score %f\n", node.Data, getName(s), weight, contentScore)
			return
		}

		text := s.Text()
		if strings.Count(text, ",") < 10 {
			counts := map[string]int{
				"p":     s.Find("p").Length(),
				"img":   s.Find("img").Length(),
				"li":    s.Find("li").Length() - 100,
				"a":     s.Find("a").Length(),
				"embed": s.Find("embed").Length(),
				"input": s.Find("input").Length(),
			}

			contentLength := len(strings.TrimSpace(text))
			linkDensity := d.getLinkDensity(s)
			remove := false
			reason := ""

			if counts["img"] > counts["p"] {
				reason = "too many images"
				remove = true
			} else if counts["li"] > counts["p"] && !s.Is("ul,ol") {
				reason = "more <li>s than <p>s"
				remove = true
			} else if counts["input"] > int(counts["p"]/3.0) {
				reason = "less than 3x <p>s than <input>s"
				remove = true
			} else if contentLength < d.MinTextLength && (counts["img"] == 0 || counts["img"] > 2) {
				reason = "too short content length without a single image"
				remove = true
			} else if weight < 25 && linkDensity > 0.2 {
				reason = fmt.Sprintf("too many links for its weight (%f)", weight)
				remove = true
			} else if weight >= 25 && linkDensity > 0.5 {
				reason = fmt.Sprintf("too many links for its weight (%f)", weight)
				remove = true
			} else if (counts["embed"] == 1 && contentLength < 75) || counts["embed"] > 1 {
				reason = "<embed>s with too short a content length, or too many <embed>s"
				remove = true
			}

			if remove {
				Logger.Printf("Conditionally cleaned %s%s with weight %f and content score %f because it has %s\n", node.Data, getName(s), weight, contentScore, reason)
				removeNodes(s)
			}
		}
	})
}