Пример #1
0
	}
}

type boostFeatureWriter struct {
	featureWriter
}

var (
	goodQualClass = util.NewRegexFromWords(
		"article",
		"catchline",
		"chapter",
		"content",
		"head",
		"intro",
		"introduction",
		"leadin",
		"main",
		"post",
		"story",
		"summary",
		"title",
	)
	poorQualClass = util.NewRegexFromWords(
		"author",
		"blog",
		"byline",
		"caption",
		"col",
		"comment",
		"description",
Пример #2
0
			}
		}
	}
}

var (
	ignoreNames = util.NewRegexFromWords(
		"breadcrumb",
		"byline",
		"caption",
		"comment",
		"community",
		"credit",
		"description",
		"email",
		"footer",
		"gallery",
		"hide",
		"infotext",
		"photo",
		"related",
		"shares",
		"social",
		"story[-_]?bar",
		"story[-_]?feature",
	)
	ignoreStyle = util.NewRegex(`(?i)display:\s*none`)
)

// parseBody parses the <body>...</body> part of the HTML page. It creates
// Chunks for every html.TextNode found in the body.
func (doc *Document) parseBody(n *html.Node) {