Пример #1
0
func convertTagsToCard(tags map[string]string, webUrl string) (wildcard.Wildcard, error) {
	resolveAliases(tags)
	ogType, ok := tags["og:type"]
	if !ok {
		ogType = "website"
	}
	var card wildcard.Wildcard
	url, ok := tags["og:url"]
	if !ok {
		url = webUrl
	}
	switch ogType {
	case "article":
		card = wildcard.NewArticleCard(webUrl, url)
	case "video":
		fallthrough
	case "video.episode":
		fallthrough
	case "video.movie":
		fallthrough
	case "video.other":
		card = wildcard.NewVideoCard(webUrl)
	default:
		card = wildcard.NewLinkCard(webUrl, url)
	}
	err := recursivelyDecode(tags, card)
	if err != nil {
		return nil, err
	}
	return card, nil
}
Пример #2
0
func (s *Scraper) ScrapeTags(url string) (interface{}, error) {
	permitted, err := s.checkRobotsTxt(url)
	if err != nil {
		return nil, err
	}
	if !permitted {
		return nil, errors.New(fmt.Sprintf("Not permitted to fetch %s", url))
	}
	req, err := s.buildRequest(url)
	resp, err := s.client.Do(req)
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()
	if resp.StatusCode != 200 {
		errMsg := fmt.Sprintf("Could not fetch %s (%d)", url, resp.StatusCode)
		return nil, errors.New(errMsg)
	}
	contentType := resp.Header.Get("Content-Type")
	if contentType == "" || strings.Contains(contentType, "text/html") {
		return s.ParseTags(resp.Body, url)
	}
	// We can't really trust the Content-Type header, so we take
	// a look at what actually gets returned.
	contentStart, err := ioutil.ReadAll(io.LimitReader(resp.Body, 512))
	if err != nil {
		contentType = http.DetectContentType(contentStart)
	}
	switch {
	case strings.HasPrefix(contentType, "image"):
		card := wildcard.NewImageCard(url, url)
		card.Media.ImageContentType = contentType
		return card, nil
	case strings.HasPrefix(contentType, "video"):
		card := wildcard.NewVideoCard(url)
		card.Media.StreamUrl = url
		card.Media.StreamContentType = contentType
		return card, nil
	default:
		card := wildcard.NewLinkCard(url, url)
		return card, nil
	}
}