} } type boostFeatureWriter struct { featureWriter } var ( goodQualClass = util.NewRegexFromWords( "article", "catchline", "chapter", "content", "head", "intro", "introduction", "leadin", "main", "post", "story", "summary", "title", ) poorQualClass = util.NewRegexFromWords( "author", "blog", "byline", "caption", "col", "comment", "description",
} } } } var ( ignoreNames = util.NewRegexFromWords( "breadcrumb", "byline", "caption", "comment", "community", "credit", "description", "email", "footer", "gallery", "hide", "infotext", "photo", "related", "shares", "social", "story[-_]?bar", "story[-_]?feature", ) ignoreStyle = util.NewRegex(`(?i)display:\s*none`) ) // parseBody parses the <body>...</body> part of the HTML page. It creates // Chunks for every html.TextNode found in the body. func (doc *Document) parseBody(n *html.Node) {