Exemplo n.º 1
0
func (day *timeURL) parseBroadcastURLsNode(root *html.Node) (ret []*broadcastURL, err error) {
	const closeDownHour int = 5
	for _, h4 := range scrape.FindAll(root, func(n *html.Node) bool { return atom.H4 == n.DataAtom }) {
		year, month, day_, err := timeForH4(scrape.Text(h4), &day.Time)
		if nil != err {
			panic(err)
		}
		// fmt.Printf("%d-%d-%d %s\n", year, month, day, err)
		for _, a := range scrape.FindAll(h4.Parent, func(n *html.Node) bool { return atom.A == n.DataAtom && atom.Dt == n.Parent.DataAtom }) {
			m := hourMinuteTitleRegExp.FindStringSubmatch(scrape.Text(a))
			if nil == m {
				panic(errors.New("Couldn't parse <a>"))
			}
			ur, _ := url.Parse(scrape.Attr(a, "href"))
			hour := r.MustParseInt(m[1])
			dayOffset := 0
			if hour < closeDownHour {
				dayOffset = 1
			}
			// fmt.Printf("%s %s\n", b.r.TimeURL.String(), b.Title)
			bcu := broadcastURL(r.BroadcastURL{
				TimeURL: r.TimeURL{
					Time:    time.Date(year, month, day_+dayOffset, hour, r.MustParseInt(m[2]), 0, 0, localLoc),
					Source:  *day.Source.ResolveReference(ur),
					Station: day.Station,
				},
				Title: strings.TrimSpace(m[3]),
			})
			ret = append(ret, &bcu)
		}
	}
	return
}
Exemplo n.º 2
0
func timeForH4(h4 string, now *time.Time) (year int, mon time.Month, day int, err error) {
	m := dayMonthRegExp.FindStringSubmatch(h4)
	if nil == m {
		// err = error.New("Couldn't parse " + h4)
		return
	}
	mon = time.Month(r.MustParseInt(m[2]))
	year = yearForMonth(mon, now)
	day = r.MustParseInt(m[1])
	return
}
Exemplo n.º 3
0
func (day *timeURL) parseBroadcastsFromNode(root *html.Node) (ret []*r.Broadcast, err error) {
	nodes := scrape.FindAll(root, func(n *html.Node) bool { return atom.Div == n.DataAtom && "time" == scrape.Attr(n, "class") })
	ret = make([]*r.Broadcast, len(nodes))
	for index, tim := range nodes {
		// prepare response
		bc := r.Broadcast{
			BroadcastURL: r.BroadcastURL{
				TimeURL: r.TimeURL(*day),
			},
		}
		// some defaults
		bc.Language = &lang_de
		bc.Publisher = &publisher
		// set start time
		{
			div_t := strings.TrimSpace(scrape.Text(tim))
			if 5 != len(div_t) {
				continue
			}
			hour := r.MustParseInt(div_t[0:2])
			minute := r.MustParseInt(div_t[3:5])
			bc.Time = time.Date(day.Year(), day.Month(), day.Day(), hour, minute, 0, 0, day.TimeZone)
			if index > 0 {
				ret[index-1].DtEnd = &bc.Time
			}
		}
		for _, tit := range scrape.FindAll(tim.Parent, func(n *html.Node) bool {
			return atom.A == n.DataAtom && atom.Div == n.Parent.DataAtom && "descr" == scrape.Attr(n.Parent, "class")
		}) {
			// Title
			bc.Title = strings.TrimSpace(scrape.Text(tit))
			href := scrape.Attr(tit, "href")
			if "" != href {
				u, _ := url.Parse(href)
				bc.Subject = day.Source.ResolveReference(u)
			}

			desc_node := tit.Parent
			desc_node.RemoveChild(tit)
			description := r.TextWithBrFromNodeSet([]*html.Node{desc_node})
			bc.Description = &description
			// fmt.Fprintf(os.Stderr, "\n")
		}
		ret[index] = &bc
	}
	// fmt.Fprintf(os.Stderr, "len(ret) = %d '%s'\n", len(ret), day.Source.String())
	if len(nodes) > 0 {
		midnight := time.Date(day.Year(), day.Month(), day.Day(), 24, 0, 0, 0, day.TimeZone)
		ret[len(nodes)-1].DtEnd = &midnight
	}
	return
}
Exemplo n.º 4
0
func (day *timeURL) parseBroadcastsFromNode(root *html.Node) (ret []*r.Broadcast, err error) {
	// fmt.Fprintf(os.Stderr, "%s\n", day.Source.String())
	index := 0
	for _, at := range scrape.FindAll(root, func(n *html.Node) bool {
		return atom.Div == n.DataAtom && "si_dayList_starttime" == scrape.Attr(n, "class")
	}) {
		// prepare response
		bc := r.Broadcast{
			BroadcastURL: r.BroadcastURL{
				TimeURL: r.TimeURL(*day),
			},
		}
		// some defaults
		bc.Language = &lang_de
		bc.Publisher = &publisher
		empty_str := ""
		bc.Description = &empty_str
		// set start time
		{
			hhmm := scrape.Text(at)
			// fmt.Fprintf(os.Stderr, "  a_id=%s\n", a_id)
			hour := r.MustParseInt(hhmm[0:2])
			minute := r.MustParseInt(hhmm[3:5])
			if 24 < hour || 60 < minute {
				continue
			}
			bc.Time = time.Date(day.Year(), day.Month(), day.Day(), hour, minute, 0, 0, day.TimeZone)
			if index > 0 {
				ret[index-1].DtEnd = &bc.Time
			}
		}
		// Title
		for idx, div := range scrape.FindAll(at.Parent, func(n *html.Node) bool {
			return atom.Div == n.DataAtom && "si_dayList_description" == scrape.Attr(n, "class")
		}) {
			if idx != 0 {
				err = errors.New("There was more than 1 <div class='si_dayList_description'>")
				return
			}
			bc.Title = scrape.Text(div)
			//				u, _ := url.Parse(scrape.Attr(h3_a, "href"))
			//			bc.Subject = day.Source.ResolveReference(u)

			bc.Title = strings.TrimSpace(bc.Title)
			for idx1, a := range scrape.FindAll(div, func(n *html.Node) bool {
				return atom.A == n.DataAtom
			}) {
				if idx1 != 0 {
					err = errors.New("There was more than 1 <a>")
					return
				}
				u, _ := url.Parse(scrape.Attr(a, "href"))
				bc.Subject = day.Source.ResolveReference(u)
			}
		}
		// fmt.Fprintf(os.Stderr, "\n")
		ret = append(ret, &bc)
		index += 1
	}
	// fmt.Fprintf(os.Stderr, "len(ret) = %d '%s'\n", len(ret), day.Source.String())
	if index > 0 {
		midnight := time.Date(day.Year(), day.Month(), day.Day(), 24, 0, 0, 0, day.TimeZone)
		ret[index-1].DtEnd = &midnight
	}
	return
}
Exemplo n.º 5
0
func (day *timeURL) parseBroadcastsFromNode(root *html.Node) (ret []*r.Broadcast, err error) {
	// fmt.Fprintf(os.Stderr, "%s\n", day.Source.String())
	index := 0
	for _, at := range scrape.FindAll(root, func(n *html.Node) bool {
		return atom.A == n.DataAtom &&
			atom.Td == n.Parent.DataAtom &&
			atom.Tr == n.Parent.Parent.DataAtom &&
			"time" == scrape.Attr(n.Parent, "class")
	}) {
		// prepare response
		bc := r.Broadcast{
			BroadcastURL: r.BroadcastURL{
				TimeURL: r.TimeURL(*day),
			},
		}

		// some defaults
		bc.Language = &lang_de
		{
			publisher := "http://www.deutschlandfunk.de/"
			if "drk" == day.Station.Identifier {
				publisher = "http://www.deutschlandradiokultur.de/"
			}
			bc.Publisher = &publisher
		}
		// set start time
		{
			a_id := scrape.Attr(at, "name")
			if "" == a_id {
				continue
			}
			bc.Source.Fragment = a_id
			hour := r.MustParseInt(a_id[0:2])
			minute := r.MustParseInt(a_id[2:4])
			if 24 < hour || 60 < minute {
				continue
			}
			bc.Time = time.Date(day.Year(), day.Month(), day.Day(), hour, minute, 0, 0, day.TimeZone)
			if index > 0 {
				ret[index-1].DtEnd = &bc.Time
			}
		}
		// Title
		for idx, h3 := range scrape.FindAll(at.Parent.Parent, func(n *html.Node) bool {
			return atom.H3 == n.DataAtom &&
				atom.Td == n.Parent.DataAtom &&
				atom.Tr == n.Parent.Parent.DataAtom &&
				"description" == scrape.Attr(n.Parent, "class")
		}) {
			if idx != 0 {
				err = errors.New("There was more than 1 <tr><td class='description'><h3>")
				return
			}
			// purge 'aufnehmen' link:
			for _, chi := range scrape.FindAll(h3, func(n *html.Node) bool {
				return atom.A == n.DataAtom &&
					"psradio" == scrape.Attr(n, "class")
			}) {
				h3.RemoveChild(chi)
			}
			// fmt.Fprintf(os.Stderr, " '%s'\n", scrape.Text(h3))

			for idx, h3_a := range scrape.FindAll(h3, func(n *html.Node) bool {
				return atom.A == n.DataAtom
			}) {
				if idx != 0 {
					err = errors.New("There was more than 1 <tr><td class='description'><h3><a>")
					return
				}
				bc.Title = scrape.Text(h3_a)
				u, _ := url.Parse(scrape.Attr(h3_a, "href"))
				bc.Subject = day.Source.ResolveReference(u)
			}
			bc.Title = strings.TrimSpace(bc.Title)
			if "" == bc.Title {
				bc.Title = r.TextChildrenNoClimb(h3)
			}
			// fmt.Fprintf(os.Stderr, " '%s'", bc.Title)
			{
				description := r.TextWithBrFromNodeSet(scrape.FindAll(h3.Parent, func(n *html.Node) bool { return atom.P == n.DataAtom }))
				bc.Description = &description
			}
		}
		// fmt.Fprintf(os.Stderr, "\n")
		ret = append(ret, &bc)
		index += 1
	}
	// fmt.Fprintf(os.Stderr, "len(ret) = %d '%s'\n", len(ret), day.Source.String())
	if index > 0 {
		midnight := time.Date(day.Year(), day.Month(), day.Day(), 24, 0, 0, 0, day.TimeZone)
		ret[index-1].DtEnd = &midnight
	}
	return
}