Ejemplo n.º 1
0
func readTitleNode(c *html.Node) string {
	titleNode := common.GetElementsByTagName(c, "h1")[0]
	aNode := common.GetElementsByTagName(titleNode, "a")[0]

	// 제목이 등록되지 않은 예외 상황이 있더라
	if aNode.FirstChild == nil {
		return ""
	}

	title := aNode.FirstChild.Data
	return title
}
Ejemplo n.º 2
0
func readArtistNode(c *html.Node) []string {
	artistParentNode := common.GetElementByClassName(c, "artist-list")
	artistNodes := common.GetElementsByTagName(artistParentNode, "a")
	artists := []string{}
	for _, c := range artistNodes {
		artist := c.FirstChild.Data
		artists = append(artists, artist)
	}
	return artists
}
Ejemplo n.º 3
0
func readCoverNode(c *html.Node) []string {
	coverParentNode := common.GetElementByClassName(c, "dj-img-cont")
	coverNodes := common.GetElementsByTagName(coverParentNode, "img")
	covers := []string{}
	for _, c := range coverNodes {
		cover := "https:" + c.Attr[0].Val
		covers = append(covers, cover)
	}
	return covers
}
Ejemplo n.º 4
0
func readIdNode(c *html.Node) string {
	titleNode := common.GetElementsByTagName(c, "h1")[0]
	url := titleNode.FirstChild.Attr[0].Val
	re := regexp.MustCompile(`/galleries/(.+).html`)
	m := re.FindStringSubmatch(url)
	if m == nil {
		return ""
	}
	return m[1]
}
Ejemplo n.º 5
0
func readTagNode(c *html.Node) []string {
	tagParentNode := common.GetElementByClassName(c, "relatedtags")
	tagNodes := common.GetElementsByTagName(tagParentNode, "a")
	tags := []string{}
	for _, c := range tagNodes {
		tag := c.FirstChild.Data
		tag = sanitizeTag(tag)
		tags = append(tags, tag)
	}
	return tags
}
Ejemplo n.º 6
0
func ReadEntryNode(n *html.Node) Metadata {
	// language + type
	// 특별한 구분자가 없어서 a 태그 전부 뽑은후 URL로 찾기
	galleryType := ""
	language := ""
	series := []string{}

	descNode := common.GetElementByClassName(n, "dj-desc")
	aTags := common.GetElementsByTagName(descNode, "a")

	// /type/doujinshi-all-1.html
	typeRe := regexp.MustCompile(`/type/(.+)-(.+)-(\d+).html`)
	// /index-korean-1.html
	languageRe := regexp.MustCompile(`/index-(.+)-(\d+).html`)
	// /series/kantai%20collection-all-1.html
	seriesRe := regexp.MustCompile(`/series/(.+)-(.+)-(\d+).html`)

	for _, c := range aTags {
		if c.Attr[0].Key != "href" {
			continue
		}
		url := c.Attr[0].Val
		typeMatch := typeRe.FindStringSubmatch(url)
		if typeMatch != nil {
			galleryType = common.UrlDecode(typeMatch[1])
		}
		languageMatch := languageRe.FindStringSubmatch(url)
		if languageMatch != nil {
			language = common.UrlDecode(languageMatch[1])
		}
		seriesMatch := seriesRe.FindStringSubmatch(url)
		if seriesMatch != nil {
			series = append(series, common.UrlDecode(seriesMatch[1]))
		}
	}

	return Metadata{
		ID:         readIdNode(n),
		Title:      readTitleNode(n),
		Covers:     readCoverNode(n),
		Artists:    readArtistNode(n),
		Groups:     []string{},
		Type:       galleryType,
		Language:   language,
		Series:     series,
		Characters: []string{},
		Tags:       readTagNode(n),
		Date:       readDateNode(n),
	}
}