Example #1
0
func getQ(tknzer html.Tokenizer, ch chan string) {
	tknzer.Next()
	tknzer.Next()
	tknzer.Next()
	tknzer.Next()
	ch <- string(tknzer.Text())
}
Example #2
0
func getText(z *html.Tokenizer) string {
	tt := z.Next()
	switch tt {
	case html.ErrorToken:
		panic(z.Err())
	case html.TextToken:
		return string(z.Text())
	}
	return ""
}
func (item *AnimeConventionItem) readCountry(t *html.Tokenizer, hasmore bool) {
	if hasmore {
		_, val, _ := t.TagAttr()
		item.country = string(val)
		return

	}
	t.Next()
	item.country = string(t.Text())
}
func (item *AnimeConventionItem) readNameAndLink(t *html.Tokenizer) {
	if label := t.Next(); label == html.StartTagToken {
		_, hasmore := t.TagName()
		if hasmore {
			if key, val, _ := t.TagAttr(); strings.EqualFold(string(key), "href") {
				item.siteURL = string(val)
			}
		}
	}
	if label := t.Next(); label == html.TextToken {
		item.name = string(t.Text())
	}
}
func (item *AnimeConventionItem) Parse(t *html.Tokenizer) {
	for {
		label := t.Next()
		switch label {
		case html.ErrorToken:
			fmt.Errorf("%v\n", t.Err())
			return
		case html.TextToken:
			switch string(t.Text()) {
			case "Advance Rates:":
				//fmt.Println("rate")
				item.readadvanceRate(t)
			case "At-Door Rates:":
				item.readatDoorRate(t)
			}
		case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken:
			tag, hasmore := t.TagName()
			if strings.EqualFold(string(tag), "big") {
				item.readResgiterNowurl(t)
			} else if hasmore {
				key, val, hasmore := t.TagAttr()
				if strings.EqualFold(string(key), "itemprop") {
					//fmt.Println(string(val))
					switch string(val) {
					case "description":
						item.readDescription(t)
					case "latitude":
						item.readLatitude(t)
					case "longitude":
						item.readLongitude(t)
					case "startDate":
						item.readStartDate(t)
					case "endDate":
						item.readEndDate(t)
					case "location":
						item.readLocation(t)
					case "addressLocality":
						item.readCity(t)
					case "addressRegion":
						item.readState(t)
					case "addressCountry":
						item.readCountry(t, hasmore)
					case "name":
						item.readNameAndLink(t)
					}
				}
			}
		}
	}
}
func (item *AnimeConventionItem) readLocation(t *html.Tokenizer) {
	for {
		if label := t.Next(); label == html.StartTagToken {
			_, hasmore := t.TagName()
			if hasmore {
				if _, val, _ := t.TagAttr(); strings.EqualFold(string(val), "name") {
					break
				}
			}
		}
	}
	if label := t.Next(); label == html.TextToken {
		item.location = string(t.Text())
	}
}
func (item *AnimeConventionItem) readRates(t *html.Tokenizer) string {
	rates := ""
	for {
		label := t.Next()
		if label == html.EndTagToken {
			val, _ := t.TagName()
			if strings.EqualFold(string(val), "p") {
				break
			}
		}
		if label == html.TextToken {
			rates = strings.Join([]string{rates, string(t.Text())}, "\n")
		}
	}
	return strings.TrimSpace(rates)
}
Example #8
0
func buildTokenArray(tokenizer *xhtml.Tokenizer) []tagInfo {
	tokens := []tagInfo{}
	for tt := tokenizer.Next(); tt != xhtml.ErrorToken; tt = tokenizer.Next() {
		switch tt {
		case xhtml.TextToken:
			txt := string(tokenizer.Text())
			if len(tokens) == 0 {
				info := tagInfo{
					raw: txt,
				}
				tokens = append(tokens, info)
			}
			tn, _ := tokenizer.TagName()
			key, val, _ := tokenizer.TagAttr()
			info := tagInfo{
				tag: string(tn),
				key: string(key),
				val: string(val),
				txt: txt,
			}
			tokens = append(tokens, info)
		case xhtml.StartTagToken:
			tn, _ := tokenizer.TagName()
			key, val, _ := tokenizer.TagAttr()
			info := tagInfo{
				tag: string(tn),
				key: string(key),
				val: string(val),
			}
			tokens = append(tokens, info)
		case xhtml.SelfClosingTagToken, xhtml.EndTagToken:
			tn, _ := tokenizer.TagName()
			key, val, _ := tokenizer.TagAttr()
			info := tagInfo{
				tag:        string(tn),
				key:        string(key),
				val:        string(val),
				closingTag: true,
			}
			tokens = append(tokens, info)
		}
	}
	return tokens
}
func parseMetaJson(z *html.Tokenizer, c *MemoryContent) error {
	tt := z.Next()
	if tt != html.TextToken {
		return fmt.Errorf("expected text node for meta json, but found %v, (%s)", tt.String(), z.Raw())
	}

	bytes := z.Text()
	err := json.Unmarshal(bytes, &c.meta)
	if err != nil {
		return fmt.Errorf("error while parsing json from meta json element: %v", err.Error())
	}

	tt = z.Next()
	tag, _ := z.TagName()
	if tt != html.EndTagToken || string(tag) != "script" {
		return fmt.Errorf("Tag not properly ended. Expected </script>, but found %s", z.Raw())
	}

	return nil
}
Example #10
0
func parseToken(tokenizer *html.Tokenizer, htmlDoc *htmlDocument, parent *tagElement) (bool, bool, string) {
	tokenType := tokenizer.Next()
	switch tokenType {
	case html.ErrorToken:
		return true, false, ""
	case html.TextToken:
		text := string(tokenizer.Text())
		if strings.TrimSpace(text) == "" {
			break
		}
		textElement := &textElement{text: text}
		appendElement(htmlDoc, parent, textElement)
	case html.StartTagToken:
		tagElement := &tagElement{tagName: getTagName(tokenizer), startTagRaw: string(tokenizer.Raw())}
		appendElement(htmlDoc, parent, tagElement)
		for {
			errorToken, parentEnded, unsetEndTag := parseToken(tokenizer, htmlDoc, tagElement)
			if errorToken {
				return true, false, ""
			}
			if parentEnded {
				if unsetEndTag != "" {
					return false, false, unsetEndTag
				}
				break
			}
			if unsetEndTag != "" {
				return false, false, setEndTagRaw(tokenizer, tagElement, unsetEndTag)
			}
		}
	case html.EndTagToken:
		return false, true, setEndTagRaw(tokenizer, parent, getTagName(tokenizer))
	case html.DoctypeToken, html.SelfClosingTagToken, html.CommentToken:
		tagElement := &tagElement{tagName: getTagName(tokenizer), startTagRaw: string(tokenizer.Raw())}
		appendElement(htmlDoc, parent, tagElement)
	}
	return false, false, ""
}
Example #11
0
func getTextR(z *html.Tokenizer) string {
	r := ""
	depth := 1
	for {
		tt := z.Next()
		switch tt {
		case html.ErrorToken:
			panic(z.Err())
		case html.TextToken:
			r += string(z.Text())
		case html.StartTagToken:
			tn, _ := z.TagName()
			tns := strings.ToLower(string(tn))
			switch tns {
			case "div":
				r += "\r"
				depth++
			case "span":
				r += "'"
				depth++
			}
		case html.EndTagToken:
			tn, _ := z.TagName()
			tns := strings.ToLower(string(tn))
			switch tns {
			case "div":
				depth--
			case "span":
				r += "'"
				depth--
			}
		}
		if depth == 0 {
			return r
		}
	}
}
Example #12
0
func extractTitleFromTree(z *html.Tokenizer) string {
	depth := 0
	for {
		tt := z.Next()
		switch tt {
		case html.ErrorToken:
			return ""
		case html.TextToken:
			if depth > 0 {
				title := strings.TrimSpace(string(z.Text()))
				lower := strings.ToLower(title)
				if strings.HasPrefix(lower, "imgur") {
					return ""
				}
				return title
			}
		case html.StartTagToken:
			tn, _ := z.TagName()
			if string(tn) == "title" {
				depth++
			}
		}
	}
}
func (item *AnimeConventionItem) readCity(t *html.Tokenizer) {
	t.Next()
	item.city = string(t.Text())
}
func (item *AnimeConventionItem) readState(t *html.Tokenizer) {
	t.Next()
	item.state = string(t.Text())
}
func (item *AnimeConventionItem) readDescription(t *html.Tokenizer) {
	t.Next()
	item.description = string(t.Text())
}
Example #16
0
File: parser.go Project: influx6/gu
func pullNode(tokens *html.Tokenizer, root *Markup) {
	var node *Markup

	for {
		token := tokens.Next()

		switch token {
		case html.ErrorToken:
			return

		case html.TextToken, html.CommentToken, html.DoctypeToken:
			text := strings.TrimSpace(string(tokens.Text()))

			if text == "" {
				continue
			}

			if token == html.CommentToken {
				text = "<!--" + text + "-->"
			}

			if node != nil {
				NewText(text).Apply(node)
				continue
			}

			NewText(text).Apply(root)
			continue

		case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken:
			if token == html.EndTagToken {
				node = nil
				return
			}

			tagName, hasAttr := tokens.TagName()

			node = NewMarkup(string(tagName), token == html.SelfClosingTagToken)
			node.Apply(root)

			if hasAttr {
			attrLoop:
				for {
					key, val, more := tokens.TagAttr()

					NewAttr(string(key), string(val)).Apply(node)

					if !more {
						break attrLoop
					}
				}
			}

			if token == html.SelfClosingTagToken {
				continue
			}

			pullNode(tokens, node)
		}
	}
}