Exemple #1
0
// EVIL SPECIALCASE HACK ALERT
func checkEvilSpecialCaseHacks(artURL *url.URL, scriptNodes []*html.Node) fuzzytime.DateTime {
	published := fuzzytime.DateTime{}

	if artURL.Host == "www.buzzfeed.com" {
		dbug := Debug.DatesLogger
		// get it from javascript
		// var buzzDetails = {..., published: "2015-02-17 17:57:12", ...};

		dbug.Printf("specialcase buzzfeeed check")

		bfPat := regexp.MustCompile(`published:\s+"(.*?)"`)

		for _, el := range scriptNodes {
			txt := getTextContent(el)
			dbug.Printf("----\n%s\n----", txt)
			m := bfPat.FindStringSubmatch(txt)
			if m != nil {
				published, _, _ = fuzzytime.Extract(m[1])
				break
			}
		}
	}

	return published
}
Exemple #2
0
// Helper method to parse date in unknown format
func getTimeFromHeader(date string) (time.Time, error) {
	fuz, _, err := fuzzytime.Extract(date)
	if err != nil {
		return time.Time{}, err
	}

	if fuz.Time.HasSecond() {
		return time.Parse(ISO_FORMAT, fuz.ISOFormat())
	} else {
		return time.Parse(ISO_FORMAT_NO_SECONDS, fuz.ISOFormat())
	}
}
Exemple #3
0
// datesFromMeta checks for timestamps in <meta> tags.
// returns published, updated
func datesFromMeta(root *html.Node) (fuzzytime.DateTime, fuzzytime.DateTime) {
	metaUpdated := fuzzytime.DateTime{}
	metaPublished := fuzzytime.DateTime{}

	for _, node := range dateSels.metaPublished.MatchAll(root) {
		content := getAttr(node, "content")
		metaPublished, _, _ = fuzzytime.Extract(content)
		if metaPublished.HasFullDate() {
			break
		}
	}

	for _, node := range dateSels.metaUpdated.MatchAll(root) {
		content := getAttr(node, "content")
		metaUpdated, _, _ = fuzzytime.Extract(content)
		if metaUpdated.HasFullDate() {
			break
		}
	}

	return metaPublished, metaUpdated
}