func readTitleNode(c *html.Node) string { titleNode := common.GetElementsByTagName(c, "h1")[0] aNode := common.GetElementsByTagName(titleNode, "a")[0] // 제목이 등록되지 않은 예외 상황이 있더라 if aNode.FirstChild == nil { return "" } title := aNode.FirstChild.Data return title }
func readArtistNode(c *html.Node) []string { artistParentNode := common.GetElementByClassName(c, "artist-list") artistNodes := common.GetElementsByTagName(artistParentNode, "a") artists := []string{} for _, c := range artistNodes { artist := c.FirstChild.Data artists = append(artists, artist) } return artists }
func readCoverNode(c *html.Node) []string { coverParentNode := common.GetElementByClassName(c, "dj-img-cont") coverNodes := common.GetElementsByTagName(coverParentNode, "img") covers := []string{} for _, c := range coverNodes { cover := "https:" + c.Attr[0].Val covers = append(covers, cover) } return covers }
func readIdNode(c *html.Node) string { titleNode := common.GetElementsByTagName(c, "h1")[0] url := titleNode.FirstChild.Attr[0].Val re := regexp.MustCompile(`/galleries/(.+).html`) m := re.FindStringSubmatch(url) if m == nil { return "" } return m[1] }
func readTagNode(c *html.Node) []string { tagParentNode := common.GetElementByClassName(c, "relatedtags") tagNodes := common.GetElementsByTagName(tagParentNode, "a") tags := []string{} for _, c := range tagNodes { tag := c.FirstChild.Data tag = sanitizeTag(tag) tags = append(tags, tag) } return tags }
func ReadEntryNode(n *html.Node) Metadata { // language + type // 특별한 구분자가 없어서 a 태그 전부 뽑은후 URL로 찾기 galleryType := "" language := "" series := []string{} descNode := common.GetElementByClassName(n, "dj-desc") aTags := common.GetElementsByTagName(descNode, "a") // /type/doujinshi-all-1.html typeRe := regexp.MustCompile(`/type/(.+)-(.+)-(\d+).html`) // /index-korean-1.html languageRe := regexp.MustCompile(`/index-(.+)-(\d+).html`) // /series/kantai%20collection-all-1.html seriesRe := regexp.MustCompile(`/series/(.+)-(.+)-(\d+).html`) for _, c := range aTags { if c.Attr[0].Key != "href" { continue } url := c.Attr[0].Val typeMatch := typeRe.FindStringSubmatch(url) if typeMatch != nil { galleryType = common.UrlDecode(typeMatch[1]) } languageMatch := languageRe.FindStringSubmatch(url) if languageMatch != nil { language = common.UrlDecode(languageMatch[1]) } seriesMatch := seriesRe.FindStringSubmatch(url) if seriesMatch != nil { series = append(series, common.UrlDecode(seriesMatch[1])) } } return Metadata{ ID: readIdNode(n), Title: readTitleNode(n), Covers: readCoverNode(n), Artists: readArtistNode(n), Groups: []string{}, Type: galleryType, Language: language, Series: series, Characters: []string{}, Tags: readTagNode(n), Date: readDateNode(n), } }