Exemple #1
0
func ReadList(htmlsrc string) []Metadata {
	entries := []Metadata{}

	doc, err := html.Parse(strings.NewReader(htmlsrc))
	if err != nil {
		panic(err)
	}

	listNode := common.GetElementByClassName(doc, "gallery-content")
	if listNode == nil {
		return entries
	}

	for c := listNode.FirstChild; c != nil; c = c.NextSibling {
		// 개행 노드는 쓸모없다
		if len(strings.Trim(c.Data, "\n")) == 0 {
			continue
		}
		if c.Type != html.ElementNode {
			continue
		}

		metadata := ReadEntryNode(c)
		entries = append(entries, metadata)
	}
	return entries
}
Exemple #2
0
func readArtistNode(c *html.Node) []string {
	artistParentNode := common.GetElementByClassName(c, "artist-list")
	artistNodes := common.GetElementsByTagName(artistParentNode, "a")
	artists := []string{}
	for _, c := range artistNodes {
		artist := c.FirstChild.Data
		artists = append(artists, artist)
	}
	return artists
}
Exemple #3
0
func readCoverNode(c *html.Node) []string {
	coverParentNode := common.GetElementByClassName(c, "dj-img-cont")
	coverNodes := common.GetElementsByTagName(coverParentNode, "img")
	covers := []string{}
	for _, c := range coverNodes {
		cover := "https:" + c.Attr[0].Val
		covers = append(covers, cover)
	}
	return covers
}
Exemple #4
0
func readTagNode(c *html.Node) []string {
	tagParentNode := common.GetElementByClassName(c, "relatedtags")
	tagNodes := common.GetElementsByTagName(tagParentNode, "a")
	tags := []string{}
	for _, c := range tagNodes {
		tag := c.FirstChild.Data
		tag = sanitizeTag(tag)
		tags = append(tags, tag)
	}
	return tags
}
Exemple #5
0
func ReadEntryNode(n *html.Node) Metadata {
	// language + type
	// 특별한 구분자가 없어서 a 태그 전부 뽑은후 URL로 찾기
	galleryType := ""
	language := ""
	series := []string{}

	descNode := common.GetElementByClassName(n, "dj-desc")
	aTags := common.GetElementsByTagName(descNode, "a")

	// /type/doujinshi-all-1.html
	typeRe := regexp.MustCompile(`/type/(.+)-(.+)-(\d+).html`)
	// /index-korean-1.html
	languageRe := regexp.MustCompile(`/index-(.+)-(\d+).html`)
	// /series/kantai%20collection-all-1.html
	seriesRe := regexp.MustCompile(`/series/(.+)-(.+)-(\d+).html`)

	for _, c := range aTags {
		if c.Attr[0].Key != "href" {
			continue
		}
		url := c.Attr[0].Val
		typeMatch := typeRe.FindStringSubmatch(url)
		if typeMatch != nil {
			galleryType = common.UrlDecode(typeMatch[1])
		}
		languageMatch := languageRe.FindStringSubmatch(url)
		if languageMatch != nil {
			language = common.UrlDecode(languageMatch[1])
		}
		seriesMatch := seriesRe.FindStringSubmatch(url)
		if seriesMatch != nil {
			series = append(series, common.UrlDecode(seriesMatch[1]))
		}
	}

	return Metadata{
		ID:         readIdNode(n),
		Title:      readTitleNode(n),
		Covers:     readCoverNode(n),
		Artists:    readArtistNode(n),
		Groups:     []string{},
		Type:       galleryType,
		Language:   language,
		Series:     series,
		Characters: []string{},
		Tags:       readTagNode(n),
		Date:       readDateNode(n),
	}
}
Exemple #6
0
func readDateNode(c *html.Node) string {
	dateNode := common.GetElementByClassName(c, "date")
	date := dateNode.FirstChild.Data
	return date
}