Ejemplos de Selection.Text en Golang

Lenguaje de programación: Golang

Namespace/Package Name: github.com/PuerkitoBio/goquery

Clase / Tipo: Selection

Método / Función: Text

Ejemplos en hotexamples.com: 30

La función `Text` de `github.com.puerkitobio.goquery.Selection` en Golang devuelve el texto contenido en los elementos seleccionados. Este método es útil para extraer y manipular información de un documento HTML utilizando selectores CSS.

Golang Selection.Text - 30 ejemplos encontrados. Estos son los ejemplos en Golang del mundo real mejor valorados de github.com/PuerkitoBio/goquery.Selection.Text extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

Attr(30)

Text(30)

Find(30)

Get(14)

Each(14)

Length(9)

Children(9)

Html(8)

Parent(6)

Is(4)

Nodes(4)

First(4)

FindMatcher(4)

Prev(4)

Eq(3)

Map(3)

ReplaceWithHtml(3)

Contents(3)

Closest(3)

SetAttr(3)

Size(3)

Union(2)

HasClass(2)

EachWithBreak(2)

Next(2)

Clone(2)

AttrOr(2)

AppendSelection(1)

Not(1)

FilterMatcher(1)

Parents(1)

Empty(1)

Remove(1)

RemoveFiltered(1)

Siblings(1)

ChildrenFiltered(1)

AppendNodes(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: wikipediamoscow.go Proyecto: Jenyay/streetlist

// Add street to result and find street information (to WikipediaMoscow.result)
func (parser *WikipediaMoscow) processLink(_ int, s *goquery.Selection, done chan<- *StreetInfo) {
	name := strings.TrimSpace(s.Text())
	if len(name) == 0 {
		done <- parser.getEmptyInfo("")
		return
	}

	href, exists := s.Attr("href")
	if !exists {
		done <- parser.getEmptyInfo("")
		return
	}

	var (
		class string
		info  *StreetInfo
	)

	class, exists = s.Attr("class")

	if exists && class == "new" {
		info = parser.getEmptyInfo(name)
	} else if resp, err := http.Get(parser.baseURL + href); err != nil {
		info = parser.getEmptyInfo(name)
	} else {
		streetparser := NewWikipediaStreetParser()
		info, err = streetparser.ParseStreetInfo(name, resp.Body)
		if err != nil {
			info = parser.getEmptyInfo(name)
		}
	}
	done <- info
}

Ejemplo n.º 2

Mostrar archivo

Archivo: wiki.go Proyecto: roth1002/apex

// node returns a string representation of the selection.
func node(i int, s *goquery.Selection) string {
	switch node := s.Get(0); {
	case node.Data == "h1":
		return fmt.Sprintf(" \033[%dm# %s\033[0m\n\n", blue, text(s))
	case node.Data == "h2":
		return fmt.Sprintf(" \033[%dm## %s\033[0m\n\n", blue, text(s))
	case node.Data == "h3":
		return fmt.Sprintf(" \033[%dm### %s\033[0m\n\n", blue, text(s))
	case node.Data == "p":
		return fmt.Sprintf("\033[%dm%s\033[0m\n\n", none, indent(text(s), 1))
	case node.Data == "pre" || s.HasClass("highlight"):
		return fmt.Sprintf("\033[1m%s\033[0m\n\n", indent(text(s), 2))
	case node.Data == "a":
		return fmt.Sprintf("%s (%s) ", s.Text(), s.AttrOr("href", "missing link"))
	case node.Data == "li":
		return fmt.Sprintf("  • %s\n", contents(s))
	case node.Data == "ul":
		return fmt.Sprintf("%s\n", nodes(s))
	case node.Data == "code":
		return fmt.Sprintf("\033[1m%s\033[0m ", s.Text())
	case node.Type == html.TextNode:
		return strings.TrimSpace(node.Data)
	default:
		return ""
	}
}

Ejemplo n.º 3

Mostrar archivo

Archivo: scrape.go Proyecto: squat/drae

func ScrapeExamples(s *goquery.Selection) []string {
	examples := []string{}
	s.Find("span.h").Each(func(i int, s *goquery.Selection) {
		examples = append(examples, s.Text())
	})
	return examples
}

Ejemplo n.º 4

Mostrar archivo

Archivo: scrape.go Proyecto: squat/drae

func JoinNodesWithSpace(s *goquery.Selection) string {
	texts := []string{}
	s.Each(func(i int, s *goquery.Selection) {
		texts = append(texts, s.Text())
	})
	return strings.Join(texts, " ")
}

Ejemplo n.º 5

Mostrar archivo

Archivo: crawl_products.go Proyecto: josephmisiti/mac-crawler

func parseColors(s *goquery.Selection) string {
	colors := ""
	s.Each(func(i int, s *goquery.Selection) {
		colors += s.Text()
	})
	return colors
}

Ejemplo n.º 6

Mostrar archivo

Archivo: extractor.go Proyecto: ngs/GoOse

func (this *contentExtractor) getSiblingsContent(currentSibling *goquery.Selection, baselinescoreSiblingsPara float64) []*goquery.Selection {
	ps := make([]*goquery.Selection, 0)
	if currentSibling.Get(0).DataAtom.String() == "p" && len(currentSibling.Text()) > 0 {
		ps = append(ps, currentSibling)
		return ps
	} else {
		potentialParagraphs := currentSibling.Find("p")
		potentialParagraphs.Each(func(i int, s *goquery.Selection) {
			text := s.Text()
			if len(text) > 0 {
				ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, text)
				paragraphScore := ws.stopWordCount
				siblingBaselineScore := 0.30
				highLinkDensity := this.isHighLinkDensity(s)
				score := siblingBaselineScore * baselinescoreSiblingsPara
				if score < float64(paragraphScore) && !highLinkDensity {
					node := new(html.Node)
					node.Type = html.TextNode
					node.Data = text
					node.DataAtom = atom.P
					nodes := make([]*html.Node, 1)
					nodes[0] = node
					newSelection := new(goquery.Selection)
					newSelection.Nodes = nodes
					ps = append(ps, newSelection)
				}
			}

		})
	}
	return ps
}

Ejemplo n.º 7

Mostrar archivo

Archivo: proof_support_twitter.go Proyecto: polluks/client

func (rc *TwitterChecker) findSigInTweet(h SigHint, s *goquery.Selection) ProofError {

	inside := s.Text()
	html, err := s.Html()

	checkText := h.checkText

	if err != nil {
		return NewProofError(keybase1.ProofStatus_CONTENT_FAILURE, "No HTML tweet found: %s", err)
	}

	G.Log.Debug("+ Checking tweet '%s' for signature '%s'", inside, checkText)
	G.Log.Debug("| HTML is: %s", html)

	rxx := regexp.MustCompile(`^(@[a-zA-Z0-9_-]+\s+)`)
	for {
		if m := rxx.FindStringSubmatchIndex(inside); m == nil {
			break
		} else {
			prefix := inside[m[2]:m[3]]
			inside = inside[m[3]:]
			G.Log.Debug("| Stripping off @prefx: %s", prefix)
		}
	}
	if strings.HasPrefix(inside, checkText) {
		return nil
	}

	return NewProofError(keybase1.ProofStatus_DELETED, "Could not find '%s' in '%s'",
		checkText, inside)
}

Ejemplo n.º 8

Mostrar archivo

Archivo: extractor.go Proyecto: ngs/GoOse

//a lot of times the first paragraph might be the caption under an image so we'll want to make sure if we're going to
//boost a parent node that it should be connected to other paragraphs, at least for the first n paragraphs
//so we'll want to make sure that the next sibling is a paragraph and has at least some substatial weight to it
func (this *contentExtractor) isBoostable(node *goquery.Selection) bool {
	stepsAway := 0
	next := node.Next()
	for next != nil && stepsAway < node.Siblings().Length() {
		currentNodeTag := node.Get(0).DataAtom.String()
		if currentNodeTag == "p" {
			if stepsAway >= 3 {
				if this.config.debug {
					log.Println("Next paragraph is too far away, not boosting")
				}
				return false
			}

			paraText := node.Text()
			ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, paraText)
			if ws.stopWordCount > 5 {
				if this.config.debug {
					log.Println("We're gonna boost this node, seems content")
				}
				return true
			}
		}

		stepsAway++
		next = next.Next()
	}

	return false
}

Ejemplo n.º 9

Mostrar archivo

Archivo: parsetoys.go Proyecto: ruslanfirefly/parsetoys

func getDataFromDOM(s *gq.Selection, arr []string, code string) string {
	var dt string
	if arr[0] == "text" {
		dt = s.Text()
	} else {
		dt, _ = s.Attr(arr[0])
	}
	return encode_string(dt, code)
}

Ejemplo n.º 10

Mostrar archivo

Archivo: extract_content.go Proyecto: oudommeas/swan

func (e extractContent) noParasWithoutTable(s *goquery.Selection) bool {
	s.FindMatcher(pTags).Each(func(i int, s *goquery.Selection) {
		if len(s.Text()) < 25 {
			s.Remove()
		}
	})

	return s.FindMatcher(pTags).Length() == 0 && !nodeIs(s.Nodes[0], atom.Td)
}

Ejemplo n.º 11

Mostrar archivo

Archivo: readability.go Proyecto: jpoehls/feedmailer

func (d *Document) getLinkDensity(s *goquery.Selection) float32 {
	linkLength := len(s.Find("a").Text())
	textLength := len(s.Text())

	if textLength == 0 {
		return 0
	}

	return float32(linkLength) / float32(textLength)
}

Ejemplo n.º 12

Mostrar archivo

Archivo: parse.go Proyecto: sellweek/vlaky

func parseHeader(element *goquery.Selection, info *TrainInfo) {
	element.Find("span").Each(func(i int, element *goquery.Selection) {
		switch i {
		case 0:
			info.Category, info.Number, info.Name = parseTrainDenomination(element.Text())
		case 2:
			info.From, info.To = parseTrainRoute(element.Text())
		}
	})
}

Ejemplo n.º 13

Mostrar archivo

Archivo: dealer.go Proyecto: mushfiq/dealer

func displayDetails(single *goquery.Selection) {
	text := strings.TrimSpace(single.Text())
	href, _ := single.Attr("href")
	length := utf8.RuneCountInString(text)
	if ((length > 5) && wordExists(text, "keywords")) || ((length > 5) && wordExists(href, "keywords")) {
		if wordExists(text, "products") {
			fmt.Println("Link", single.Text(), "--->", href)
		}
	}

}

Ejemplo n.º 14

Mostrar archivo

Archivo: resources.go Proyecto: hojgr/travian

func parseResource(s *goquery.Selection) (_production, _stored, _capacity int) {
	productionStr, _ := s.Attr("title")
	production, _ := strconv.Atoi(productionStr)

	status := s.Text()
	split := strings.Split(status, "/")

	stored, _ := strconv.Atoi(split[0])
	capacity, _ := strconv.Atoi(split[1])

	return production, stored, capacity
}

Ejemplo n.º 15

Mostrar archivo

Archivo: category.go Proyecto: uronce-cc/weat

func addIngredient(ingredients []data.Ingredient, a *goquery.Selection) []data.Ingredient {
	if href, ok := a.Attr("href"); ok {
		glog.V(2).Info("    href: " + href)
		id, err := strconv.Atoi(strings.Split(href, "/")[2])
		if err != nil {
			glog.Errorf("Failed to extract id from %s: %v", href, err)
		} else {
			ingredients = append(ingredients, data.Ingredient{Name: a.Text(), Id: id})
		}
	}
	return ingredients
}

Ejemplo n.º 16

Mostrar archivo

Archivo: textextract.go Proyecto: Gelembjuk/articletext

func describeSentences(s *goquery.Selection) TextDescription {
	var d TextDescription

	var text string
	// get text of this node and then split for sentences
	if s.Children().Length() > 0 {
		text = getTextFromHtml(s)
	} else {
		text = s.Text()
	}

	sentences := tokenizer.Tokenize(text)

	d.CountSentences = len(sentences)
	//fmt.Println("==============================================")
	for _, s := range sentences {
		sentence := s.Text

		if len(sentence) == 0 {
			continue
		}

		c := len(get_words_from(sentence))
		//fmt.Println(sentence)

		d.AverageWords += c

		if c > 3 {
			// presume normal sentence usually has more 3 words
			d.CountLongSentences++

			if c < 25 {
				// but a sentence should not have nore 25 words. We will not
				// consider such sentence as a good one
				d.CountGoodSentences++

			}
			lastsymbol := sentence[len(sentence)-1:]

			if strings.ContainsAny(lastsymbol, ".?!") {
				d.CountCorrectSentences++
			}
		}

	}

	if d.CountSentences > 0 {
		d.AverageWords = int(d.AverageWords / d.CountSentences)
	}

	return d
}

Ejemplo n.º 17

Mostrar archivo

Archivo: extractor.go Proyecto: ngs/GoOse

//checks the density of links within a node, is there not much text and most of it contains bad links?
//if so it's no good
func (this *contentExtractor) isHighLinkDensity(node *goquery.Selection) bool {
	links := node.Find("a")
	if links == nil || links.Size() == 0 {
		return false
	}
	text := node.Text()
	words := strings.Split(text, " ")
	nwords := len(words)
	sb := make([]string, 0)
	links.Each(func(i int, s *goquery.Selection) {
		linkText := s.Text()
		sb = append(sb, linkText)
	})
	linkText := strings.Join(sb, "")
	linkWords := strings.Split(linkText, " ")
	nlinkWords := len(linkWords)
	nlinks := links.Size()
	linkDivisor := float64(nlinkWords) / float64(nwords)
	score := linkDivisor * float64(nlinks)

	if this.config.debug {
		logText := ""
		if len(node.Text()) >= 51 {
			logText = node.Text()[0:50]
		} else {
			logText = node.Text()
		}
		log.Printf("Calculated link density score as %1.5f for node %s\n", score, logText)
	}
	if score > 1.0 {
		return true
	}
	return false
}

Ejemplo n.º 18

Mostrar archivo

Archivo: common.go Proyecto: huih/webspider

func IsAllCode(bodyContent *goquery.Selection) bool {
	preDom := bodyContent.Find("pre")
	if preDom != nil {
		for index := 0; index < preDom.Length(); index++ {
			dom := preDom.Eq(index)
			dom.Remove()
		}
	}
	conStr := bodyContent.Text()
	conStr = strings.TrimSpace(conStr)
	if len(conStr) < 100 {
		return true
	}
	return false
}

Ejemplo n.º 19

Mostrar archivo

Archivo: goquery_helper.go Proyecto: speedland/service

func getText(s *goquery.Selection, includeDecendents bool) string {
	if s.Length() == 0 {
		return ""
	}
	if includeDecendents {
		return strings.TrimSpace(s.Text())
	}
	var buff []string
	for node := s.First().Nodes[0].FirstChild; node != nil; node = node.NextSibling {
		if node.Type == html.TextNode {
			buff = append(buff, node.Data)
		}
	}
	return strings.TrimSpace(strings.Join(buff, ""))
}

Ejemplo n.º 20

Mostrar archivo

Archivo: footnote.go Proyecto: twnanda/twnanda

func (s *StateMachine) ProcessSelection(sl *goquery.Selection) {
	if strings.HasPrefix(sl.Text(), "備註") {
		s.State = InFootnote
	}

	if strings.HasPrefix(sl.Text(), "資訊更新日期") {
		s.State = NotInFootnote
	}

	if s.State == InFootnote {
		if sl.Size() != 1 {
			panic("element size is not 1")
		}
		s.ProcessNode(sl.Nodes[0])
	}
}

Ejemplo n.º 21

Mostrar archivo

Archivo: parse_bible_html.go Proyecto: read-him/bible-parser

// return the chapter number and the line number for the sentence
func getLineNumber(sel *goquery.Selection) (chapterNumber int, lineNumber int) {
	rawtext := sel.Text()
	textArray := strings.Split(rawtext, ":")

	var err error = nil

	chapterNumber, err = strconv.Atoi(textArray[0])
	if err != nil {
		log.Fatal(err)
	}

	lineNumber, err = strconv.Atoi(textArray[1])
	if err != nil {
		log.Fatal(err)
	}
	return
}

Ejemplo n.º 22

Mostrar archivo

Archivo: GeneratorIndex.go Proyecto: qhsong/DoxygenToDash

func addEntryType(typename string, s *goquery.Selection) {
	linkLabel := s.Find(".memItemRight").Find("a")
	linkLabel.Each(func(i int, s *goquery.Selection) {
		structName := s.Text()
		structName = strings.Trim(structName, "\r\n ")
		link, _ := s.Attr("href")
		link = strings.Trim(link, "\r\n ")
		if len(structName) != 0 && len(link) != 0 {
			_, err := db.Exec("insert or ignore into searchIndex(name,type,path) VALUES('" + structName + "','" + typename + "','" + link + "')")
			if err != nil {
				log.Fatal("Insert " + typename + " " + structName + "Failed!")
				return
			}
			log.Print("Insert " + typename + structName)
		}
	})
}

Ejemplo n.º 23

Mostrar archivo

Archivo: main.go Proyecto: PeerRails/rgleaks-go

func ScrapeFileLink(s *goquery.Selection) {
	i := Images{Created_at: time.Now(), Updated_at: time.Now()}
	i.Name = s.Text()
	href, _ := s.Attr("href")
	isImage := i.IsImageType()
	if isImage {
		i.Source = fmt.Sprintf("http://rghost.ru%s/image.png", href)
		downloaded := i.DownloadImage()
		if downloaded {
			i.Uploaded_to = "yes"
			i.Archived = false
			err := i.InsertImage()
			if err != nil {
				log.Fatal(err)
			}
		}
	}
}

Ejemplo n.º 24

Mostrar archivo

Archivo: ulli2rst.go Proyecto: siongui/siongui.github.io

func processUl(ul *goquery.Selection, depth int) {
	ul.Find("li").Each(func(_ int, li *goquery.Selection) {
		li.Find("ul").Each(func(_ int, childUl *goquery.Selection) {
			processUl(childUl, depth+1)
		})

		lines := StringToLines(li.Text())
		var indentedLines []string
		for i, line := range lines {
			if i == 0 {
				liMarkIndex := depth % 2
				mark := liMark[liMarkIndex]
				indentedLines = append(indentedLines, "\n"+mark+" "+line)
			} else {
				indentedLines = append(indentedLines, "  "+line)
			}
		}
		li.ReplaceWithHtml(strings.Join(indentedLines, "\n"))
	})

	ul.ReplaceWithHtml(ul.Text())
}

Ejemplo n.º 25

Mostrar archivo

Archivo: extract_content.go Proyecto: oudommeas/swan

func (e extractContent) getSiblingContent(
	a *Article,
	s *goquery.Selection,
	baseScore uint) []*html.Node {

	var ret []*html.Node

	if nodeIs(s.Nodes[0], atom.P) && len(s.Text()) > 0 {
		return s.Nodes
	}

	ps := s.FindMatcher(pTags)
	for _, n := range ps.Nodes {
		cc := a.getCCache(n)
		if len(cc.text) > 0 {
			if cc.stopwords > baseScore && !cc.highLinkDensity {
				ret = append(ret, createNode(atom.P, "p", cc.text))
			}
		}
	}

	return ret
}

Ejemplo n.º 26

Mostrar archivo

Archivo: html2tex.go Proyecto: frankMilde/rol

func printSelectionTextWithTitle(title string, sel *goquery.Selection) {
	Debug("%v selection: %v", title, sel.Text())
}

Ejemplo n.º 27

Mostrar archivo

Archivo: html2tex.go Proyecto: frankMilde/rol

func printSelectionText(sel *goquery.Selection) {
	Debug("selection: %v", sel.Text())
}

Ejemplo n.º 28

Mostrar archivo

Archivo: functions.go Proyecto: s-oram/malkovich-wiki

func UpdatePageSummary(docName, PageFilePath, PreviewFilePath string) error {
	var pageSummaryData PageSummaryData

	html, err := readPageAsHtml(docName, PageFilePath)
	if err != nil {
		return err
	}
	htmlreader := bytes.NewReader(html)

	//htmlString := string(html)
	//log.Info("HtmlString " + htmlString)

	doc, err := gq.NewDocumentFromReader(htmlreader)
	if err != nil {
		return err
	}

	var SelectedNodes *gq.Selection

	//======= work out the document heading ========
	DocTitle := ""

	if DocTitle == "" {
		SelectedNodes = doc.Find("h1, h2, h3, h4").First()
		if len(SelectedNodes.Nodes) == 1 {
			DocTitle = strings.TrimSpace(SelectedNodes.Text())
		}
	}
	if DocTitle == "" {
		DocTitle = docName
	}
	//DocTitle = base64.StdEncoding.EncodeToString([]byte(DocTitle))
	DocTitle = malkovich.FolderNameToDocName(DocTitle)
	pageSummaryData.PageTitle = DocTitle

	//======== look for an image =========
	DocImage := ""
	SelectedNodes = doc.Find("img").First()
	if len(SelectedNodes.Nodes) == 1 {
		for _, nodeAttr := range SelectedNodes.Nodes[0].Attr {
			if nodeAttr.Key == "src" {
				DocImage = nodeAttr.Val
				break
			}
		}
	}
	//DocImage = base64.StdEncoding.EncodeToString([]byte(DocImage))
	pageSummaryData.FirstImage = DocImage

	//======== look for the first paragraph =========
	FirstParagraph := ""
	SelectedNodes = doc.Find("p").First()
	if len(SelectedNodes.Nodes) == 1 {
		FirstParagraph = strings.TrimSpace(SelectedNodes.Text())
	}
	//TODO:HIGH Maybe limit to a set number of charactors here.
	//FirstParagraph = base64.StdEncoding.EncodeToString([]byte(FirstParagraph))
	pageSummaryData.FirstParagraph = FirstParagraph

	jsonData, err := json.Marshal(pageSummaryData)
	if err != nil {
		return err
	}

	// TODO:MED what would the best file permissions be here?
	err = ioutil.WriteFile(PreviewFilePath, jsonData, os.FileMode(0644))
	if err != nil {
		panic(err.Error())
	}

	return nil
}

Ejemplo n.º 29

Mostrar archivo

Archivo: parse_bible_html.go Proyecto: read-him/bible-parser

// return the sentence pure content without comment number
func getPureContent(sel *goquery.Selection) string {
	pureText := sel.Text()
	return pureText
}

Ejemplo n.º 30

Mostrar archivo

Archivo: videos.go Proyecto: hotei/GoOse

func (ve *VideoExtractor) getEmbedCode(node *goquery.Selection) string {
	return node.Text()
}