Golang DateTime.Empty примеры использования

Язык программирования: Golang
Пространство имен/Пакет: github.com/bcampbell/fuzzytime
Класс/Тип: DateTime
Метод/Функция: Empty
Примеров на hotexamples.com: 1
Golang DateTime.Empty - 1 пример найден. Это лучшие примеры Golang кода для github.com/bcampbell/fuzzytime.DateTime.Empty, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.
Основные методы
Показать Скрыть
Date(1)
Empty(1)
HasFullDate(1)
Основные методы
Date (1)
Empty (1)
HasFullDate (1)
Пример #1
Показать файл
Файл: dates.go Проект: bcampbell/arts
// TODO: (eg newsquest sites)
// <span data-format="article-display" data-show-date="always" data-show-time="today-only" data-timestamp="1461211200" itemprop="datePublished" class="timestamp formatTimeStamp" full-date="20.04.2016">20 mins ago</span>
//
//
//
func grabDates(root *html.Node, artURL *url.URL,
	contentNodes []*html.Node, headlineNode *html.Node, scriptNodes []*html.Node,
	cruftBlocks []*html.Node) (fuzzytime.DateTime, fuzzytime.DateTime) {
	dbug := Debug.DatesLogger
	var publishedCandidates = make(dateCandidateList, 0, 32)
	var updatedCandidates = make(dateCandidateList, 0, 32)

	// there might be an obvious date in the URL
	urlDate := dateFromURL(artURL)

	// look for timestamps in <meta> tags
	metaPublished, metaUpdated := datesFromMeta(root)

	if metaPublished.HasFullDate() && metaUpdated.HasFullDate() {
		return metaPublished, metaUpdated
	}

	evilPublished := checkEvilSpecialCaseHacks(artURL, scriptNodes)
	// get a list of elements between headline and content
	betwixt := []*html.Node{}
	if headlineNode != nil && len(contentNodes) > 0 {
		var err error
		betwixt, err = interveningElements(headlineNode, contentNodes[0])
		if err != nil {
			betwixt = []*html.Node{}
		}
	}
	betwixtValue := 1.0

	for _, node := range dateSels.tags.MatchAll(root) {

		var txt string
		// a couple of cases where we want text from attrs instead
		switch node.DataAtom {
		case atom.Time:
			txt = getAttr(node, "datetime")
			if txt == "" {
				txt = getTextContent(node)
			}
		case atom.Abbr:
			txt = getAttr(node, "title")
			if txt == "" {
				txt = getTextContent(node)
			}
		default:
			// check for obvious machine-readable timestamps
			foo := getAttr(node, "data-timestamp")
			if foo != "" {
				i, err := strconv.ParseInt(foo, 10, 64)
				if err == nil {
					tm := time.Unix(i, 0).UTC()
					if tm.Year() < 10000 {
						// OK, looks sensible(ish). We'll use it.
						// Cheesy hack - pass it on as text for re-parsing!
						txt = tm.Format(time.RFC3339)
					}
					// else probable javascript timestamp TODO: divide by 1000 and try again!
				}
			}
			if txt == "" {
				txt = getTextContent(node)
			}
		}

		if len(txt) < 6 || len(txt) > 150 {
			continue // too short
		}

		// got some date/time info?
		dt, spans, _ := fuzzytime.WesternContext.Extract(txt)
		if dt.Empty() {
			continue // no data, (or there was an error)
		}

		//dbug.Printf("considering %s (%s) '%f'\n", describeNode(node), dt.String(), dateProportion)
		publishedC := newDateCandidate(node, txt, dt)
		updatedC := newDateCandidate(node, txt, dt)

		var dateProportion float64
		if node.DataAtom == atom.P {
			// for paragraphs, calculate proportion of text which is date/time
			mcnt := 0
			for _, span := range spans {
				mcnt += span.End - span.Begin
			}
			dateProportion = float64(mcnt) / float64(len(txt))
			if dateProportion < 0.5 {
				continue // too much text, not enough date.
			}
		}

		// prefer datetimes over just dates (or times)
		if dt.HasYear() && dt.HasMonth() && dt.HasDay() {
			if dt.HasHour() && dt.HasMinute() {
				publishedC.addPoints(0.75, "datetime")
				updatedC.addPoints(0.75, "datetime")
			}
		}
		if dt.Date.Empty() {
			publishedC.addPoints(-0.5, "no date")
			updatedC.addPoints(-0.5, "no date")
		}

		// TEST: is machine readable?
		if node.DataAtom == atom.Time {
			publishedC.addPoints(1, "<time>")
			updatedC.addPoints(1, "<time>")
		}

		// TEST: indicative text ("posted:" etc...)
		if datePats.publishedIndicativeText.MatchString(txt) {
			publishedC.addPoints(1, "indicative text")
		}
		// TEST: indicative text ("posted:" etc...)
		if datePats.updatedIndicativeText.MatchString(txt) {
			updatedC.addPoints(1, "indicative text")
		}

		// TEST: hAtom date markup
		if dateSels.hatomPublished.Match(node) {
			publishedC.addPoints(2, "hentry .published")
		}
		if dateSels.hatomUpdated.Match(node) {
			publishedC.addPoints(2, "hentry .updated")
		}

		// TEST: likely class or id?
		if datePats.genericClasses.MatchString(getAttr(node, "class")) {
			updatedC.addPoints(1, "likely class")
			publishedC.addPoints(1, "likely class")
		}
		if datePats.genericClasses.MatchString(getAttr(node, "id")) {
			updatedC.addPoints(1, "likely id")
			publishedC.addPoints(1, "likely id")
		}
		// TEST: likely class or id for published?
		if datePats.publishedClasses.MatchString(getAttr(node, "class")) {
			publishedC.addPoints(1, "likely class for published")
		}
		if datePats.publishedClasses.MatchString(getAttr(node, "id")) {
			publishedC.addPoints(1, "likely id for published")
		}
		// TEST: likely class or id for updated?
		if datePats.updatedClasses.MatchString(getAttr(node, "class")) {
			updatedC.addPoints(1, "likely class for updated")
		}
		if datePats.updatedClasses.MatchString(getAttr(node, "id")) {
			updatedC.addPoints(1, "likely id for updated")
		}

		// TEST: RDFa property="dc:issued"  (issued, updated, created etc)
		if dateSels.rdfaPublished.Match(node) {
			publishedC.addPoints(1, "likely rdfa markup for published")
		}
		if dateSels.rdfaUpdated.Match(node) {
			updatedC.addPoints(1, "likely rdfa markup for updated")
		}

		// TEST: within article content?
		for _, contentNode := range contentNodes {
			if contains(contentNode, node) {
				publishedC.addPoints(1, "contained within content")
				updatedC.addPoints(1, "contained within content")
			}
		}
		// TEST: share a parent with content?
		for _, contentNode := range contentNodes {
			if contains(contentNode.Parent, node) {
				publishedC.addPoints(1, "near content")
				updatedC.addPoints(1, "near content")
			}
		}

		// TEST: within a crfut block? (comment, social link, whatever)
		for _, cruftBlock := range cruftBlocks {
			if contains(cruftBlock, node) {
				desc := fmt.Sprintf("inside cruft (%s)", describeNode(cruftBlock))
				publishedC.addPoints(-3, desc)
				updatedC.addPoints(-3, desc)
			}
		}

		// TODO: TEST: agrees with <meta> tag values?

		// TEST: between headline and content?

		for _, e := range betwixt {
			if e == node {
				// first one preferred
				updatedC.addPoints(betwixtValue, "between headline and content")
				publishedC.addPoints(betwixtValue, "between headline and content")
				betwixtValue *= 0.9
				break
			}
		}

		// TEST: matches date info in URL?
		// (if not, fill in any missing fields using the URL date!)
		if !urlDate.Empty() {
			if urlDate.Conflicts(&dt.Date) {
				updatedC.addPoints(-1, "clash with date in url")
				publishedC.addPoints(-1, "clash with date in url")
			} else {
				dt.Date.Merge(&urlDate)
				updatedC.dt = dt
				publishedC.dt = dt
			}
		}

		// TODO: TEST - proximity to top or bottom of article content
		// TODO: check for value-title pattern?
		if publishedC.total() > 0 {
			publishedCandidates = append(publishedCandidates, publishedC)
		}

		if updatedC.total() > 0 {
			updatedCandidates = append(updatedCandidates, updatedC)
		}

	}

	dbug.Printf("date from url: %s\n", urlDate.String())
	dbug.Printf("meta updated: %s\n", metaUpdated.String())
	dbug.Printf("meta published: %s\n", metaPublished.String())
	if !evilPublished.Empty() {
		dbug.Printf("evilspecialcase published: '%s'\n", evilPublished.String())
	}

	publishedCandidates.Sort()
	dbug.Printf("PUBLISHED: %d candidates\n", len(publishedCandidates))
	for _, c := range publishedCandidates {
		c.dump(dbug)
	}

	updatedCandidates.Sort()
	dbug.Printf("UPDATED: %d candidates\n", len(updatedCandidates))
	for _, c := range updatedCandidates {
		c.dump(dbug)
	}

	var published, updated fuzzytime.DateTime

	// pick best candidate for published
	if best, err := publishedCandidates.TopDate(); err == nil {
		published = best.dt
	} else {
		dbug.Printf("published: Didn't pick any (%s)", err)
	}

	if published.Empty() {
		if !metaPublished.Empty() {
			published = metaPublished
		} else if !urlDate.Empty() {
			published = fuzzytime.DateTime{Date: urlDate}
		} else if !evilPublished.Empty() {
			published = evilPublished
		}
	}

	// updated: use meta data if present
	if metaUpdated.HasFullDate() {
		updated = metaUpdated
	} else {
		if best, err := updatedCandidates.TopDate(); err == nil {
			updated = best.dt
			// if time only, use date from published
			if updated.Date.Empty() && !updated.Time.Empty() {
				updated.Date = published.Date
			}
		} else {
			dbug.Printf("updated: Didn't pick any (%s)", err)
		}
	}

	return published, updated
}