//a lot of times the first paragraph might be the caption under an image so we'll want to make sure if we're going to //boost a parent node that it should be connected to other paragraphs, at least for the first n paragraphs //so we'll want to make sure that the next sibling is a paragraph and has at least some substatial weight to it func (this *contentExtractor) isBoostable(node *goquery.Selection) bool { stepsAway := 0 next := node.Next() for next != nil && stepsAway < node.Siblings().Length() { currentNodeTag := node.Get(0).DataAtom.String() if currentNodeTag == "p" { if stepsAway >= 3 { if this.config.debug { log.Println("Next paragraph is too far away, not boosting") } return false } paraText := node.Text() ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, paraText) if ws.stopWordCount > 5 { if this.config.debug { log.Println("We're gonna boost this node, seems content") } return true } } stepsAway++ next = next.Next() } return false }