func convertTagsToCard(tags map[string]string, webUrl string) (wildcard.Wildcard, error) { resolveAliases(tags) ogType, ok := tags["og:type"] if !ok { ogType = "website" } var card wildcard.Wildcard url, ok := tags["og:url"] if !ok { url = webUrl } switch ogType { case "article": card = wildcard.NewArticleCard(webUrl, url) case "video": fallthrough case "video.episode": fallthrough case "video.movie": fallthrough case "video.other": card = wildcard.NewVideoCard(webUrl) default: card = wildcard.NewLinkCard(webUrl, url) } err := recursivelyDecode(tags, card) if err != nil { return nil, err } return card, nil }
func (s *Scraper) ScrapeTags(url string) (interface{}, error) { permitted, err := s.checkRobotsTxt(url) if err != nil { return nil, err } if !permitted { return nil, errors.New(fmt.Sprintf("Not permitted to fetch %s", url)) } req, err := s.buildRequest(url) resp, err := s.client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != 200 { errMsg := fmt.Sprintf("Could not fetch %s (%d)", url, resp.StatusCode) return nil, errors.New(errMsg) } contentType := resp.Header.Get("Content-Type") if contentType == "" || strings.Contains(contentType, "text/html") { return s.ParseTags(resp.Body, url) } // We can't really trust the Content-Type header, so we take // a look at what actually gets returned. contentStart, err := ioutil.ReadAll(io.LimitReader(resp.Body, 512)) if err != nil { contentType = http.DetectContentType(contentStart) } switch { case strings.HasPrefix(contentType, "image"): card := wildcard.NewImageCard(url, url) card.Media.ImageContentType = contentType return card, nil case strings.HasPrefix(contentType, "video"): card := wildcard.NewVideoCard(url) card.Media.StreamUrl = url card.Media.StreamContentType = contentType return card, nil default: card := wildcard.NewLinkCard(url, url) return card, nil } }