// EVIL SPECIALCASE HACK ALERT func checkEvilSpecialCaseHacks(artURL *url.URL, scriptNodes []*html.Node) fuzzytime.DateTime { published := fuzzytime.DateTime{} if artURL.Host == "www.buzzfeed.com" { dbug := Debug.DatesLogger // get it from javascript // var buzzDetails = {..., published: "2015-02-17 17:57:12", ...}; dbug.Printf("specialcase buzzfeeed check") bfPat := regexp.MustCompile(`published:\s+"(.*?)"`) for _, el := range scriptNodes { txt := getTextContent(el) dbug.Printf("----\n%s\n----", txt) m := bfPat.FindStringSubmatch(txt) if m != nil { published, _, _ = fuzzytime.Extract(m[1]) break } } } return published }
// Helper method to parse date in unknown format func getTimeFromHeader(date string) (time.Time, error) { fuz, _, err := fuzzytime.Extract(date) if err != nil { return time.Time{}, err } if fuz.Time.HasSecond() { return time.Parse(ISO_FORMAT, fuz.ISOFormat()) } else { return time.Parse(ISO_FORMAT_NO_SECONDS, fuz.ISOFormat()) } }
// datesFromMeta checks for timestamps in <meta> tags. // returns published, updated func datesFromMeta(root *html.Node) (fuzzytime.DateTime, fuzzytime.DateTime) { metaUpdated := fuzzytime.DateTime{} metaPublished := fuzzytime.DateTime{} for _, node := range dateSels.metaPublished.MatchAll(root) { content := getAttr(node, "content") metaPublished, _, _ = fuzzytime.Extract(content) if metaPublished.HasFullDate() { break } } for _, node := range dateSels.metaUpdated.MatchAll(root) { content := getAttr(node, "content") metaUpdated, _, _ = fuzzytime.Extract(content) if metaUpdated.HasFullDate() { break } } return metaPublished, metaUpdated }