func getQ(tknzer html.Tokenizer, ch chan string) { tknzer.Next() tknzer.Next() tknzer.Next() tknzer.Next() ch <- string(tknzer.Text()) }
func getText(z *html.Tokenizer) string { tt := z.Next() switch tt { case html.ErrorToken: panic(z.Err()) case html.TextToken: return string(z.Text()) } return "" }
func (item *AnimeConventionItem) readCountry(t *html.Tokenizer, hasmore bool) { if hasmore { _, val, _ := t.TagAttr() item.country = string(val) return } t.Next() item.country = string(t.Text()) }
func (item *AnimeConventionItem) readNameAndLink(t *html.Tokenizer) { if label := t.Next(); label == html.StartTagToken { _, hasmore := t.TagName() if hasmore { if key, val, _ := t.TagAttr(); strings.EqualFold(string(key), "href") { item.siteURL = string(val) } } } if label := t.Next(); label == html.TextToken { item.name = string(t.Text()) } }
func (item *AnimeConventionItem) Parse(t *html.Tokenizer) { for { label := t.Next() switch label { case html.ErrorToken: fmt.Errorf("%v\n", t.Err()) return case html.TextToken: switch string(t.Text()) { case "Advance Rates:": //fmt.Println("rate") item.readadvanceRate(t) case "At-Door Rates:": item.readatDoorRate(t) } case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken: tag, hasmore := t.TagName() if strings.EqualFold(string(tag), "big") { item.readResgiterNowurl(t) } else if hasmore { key, val, hasmore := t.TagAttr() if strings.EqualFold(string(key), "itemprop") { //fmt.Println(string(val)) switch string(val) { case "description": item.readDescription(t) case "latitude": item.readLatitude(t) case "longitude": item.readLongitude(t) case "startDate": item.readStartDate(t) case "endDate": item.readEndDate(t) case "location": item.readLocation(t) case "addressLocality": item.readCity(t) case "addressRegion": item.readState(t) case "addressCountry": item.readCountry(t, hasmore) case "name": item.readNameAndLink(t) } } } } } }
func (item *AnimeConventionItem) readLocation(t *html.Tokenizer) { for { if label := t.Next(); label == html.StartTagToken { _, hasmore := t.TagName() if hasmore { if _, val, _ := t.TagAttr(); strings.EqualFold(string(val), "name") { break } } } } if label := t.Next(); label == html.TextToken { item.location = string(t.Text()) } }
func (item *AnimeConventionItem) readRates(t *html.Tokenizer) string { rates := "" for { label := t.Next() if label == html.EndTagToken { val, _ := t.TagName() if strings.EqualFold(string(val), "p") { break } } if label == html.TextToken { rates = strings.Join([]string{rates, string(t.Text())}, "\n") } } return strings.TrimSpace(rates) }
func buildTokenArray(tokenizer *xhtml.Tokenizer) []tagInfo { tokens := []tagInfo{} for tt := tokenizer.Next(); tt != xhtml.ErrorToken; tt = tokenizer.Next() { switch tt { case xhtml.TextToken: txt := string(tokenizer.Text()) if len(tokens) == 0 { info := tagInfo{ raw: txt, } tokens = append(tokens, info) } tn, _ := tokenizer.TagName() key, val, _ := tokenizer.TagAttr() info := tagInfo{ tag: string(tn), key: string(key), val: string(val), txt: txt, } tokens = append(tokens, info) case xhtml.StartTagToken: tn, _ := tokenizer.TagName() key, val, _ := tokenizer.TagAttr() info := tagInfo{ tag: string(tn), key: string(key), val: string(val), } tokens = append(tokens, info) case xhtml.SelfClosingTagToken, xhtml.EndTagToken: tn, _ := tokenizer.TagName() key, val, _ := tokenizer.TagAttr() info := tagInfo{ tag: string(tn), key: string(key), val: string(val), closingTag: true, } tokens = append(tokens, info) } } return tokens }
func parseMetaJson(z *html.Tokenizer, c *MemoryContent) error { tt := z.Next() if tt != html.TextToken { return fmt.Errorf("expected text node for meta json, but found %v, (%s)", tt.String(), z.Raw()) } bytes := z.Text() err := json.Unmarshal(bytes, &c.meta) if err != nil { return fmt.Errorf("error while parsing json from meta json element: %v", err.Error()) } tt = z.Next() tag, _ := z.TagName() if tt != html.EndTagToken || string(tag) != "script" { return fmt.Errorf("Tag not properly ended. Expected </script>, but found %s", z.Raw()) } return nil }
func parseToken(tokenizer *html.Tokenizer, htmlDoc *htmlDocument, parent *tagElement) (bool, bool, string) { tokenType := tokenizer.Next() switch tokenType { case html.ErrorToken: return true, false, "" case html.TextToken: text := string(tokenizer.Text()) if strings.TrimSpace(text) == "" { break } textElement := &textElement{text: text} appendElement(htmlDoc, parent, textElement) case html.StartTagToken: tagElement := &tagElement{tagName: getTagName(tokenizer), startTagRaw: string(tokenizer.Raw())} appendElement(htmlDoc, parent, tagElement) for { errorToken, parentEnded, unsetEndTag := parseToken(tokenizer, htmlDoc, tagElement) if errorToken { return true, false, "" } if parentEnded { if unsetEndTag != "" { return false, false, unsetEndTag } break } if unsetEndTag != "" { return false, false, setEndTagRaw(tokenizer, tagElement, unsetEndTag) } } case html.EndTagToken: return false, true, setEndTagRaw(tokenizer, parent, getTagName(tokenizer)) case html.DoctypeToken, html.SelfClosingTagToken, html.CommentToken: tagElement := &tagElement{tagName: getTagName(tokenizer), startTagRaw: string(tokenizer.Raw())} appendElement(htmlDoc, parent, tagElement) } return false, false, "" }
func getTextR(z *html.Tokenizer) string { r := "" depth := 1 for { tt := z.Next() switch tt { case html.ErrorToken: panic(z.Err()) case html.TextToken: r += string(z.Text()) case html.StartTagToken: tn, _ := z.TagName() tns := strings.ToLower(string(tn)) switch tns { case "div": r += "\r" depth++ case "span": r += "'" depth++ } case html.EndTagToken: tn, _ := z.TagName() tns := strings.ToLower(string(tn)) switch tns { case "div": depth-- case "span": r += "'" depth-- } } if depth == 0 { return r } } }
func extractTitleFromTree(z *html.Tokenizer) string { depth := 0 for { tt := z.Next() switch tt { case html.ErrorToken: return "" case html.TextToken: if depth > 0 { title := strings.TrimSpace(string(z.Text())) lower := strings.ToLower(title) if strings.HasPrefix(lower, "imgur") { return "" } return title } case html.StartTagToken: tn, _ := z.TagName() if string(tn) == "title" { depth++ } } } }
func (item *AnimeConventionItem) readCity(t *html.Tokenizer) { t.Next() item.city = string(t.Text()) }
func (item *AnimeConventionItem) readState(t *html.Tokenizer) { t.Next() item.state = string(t.Text()) }
func (item *AnimeConventionItem) readDescription(t *html.Tokenizer) { t.Next() item.description = string(t.Text()) }
func pullNode(tokens *html.Tokenizer, root *Markup) { var node *Markup for { token := tokens.Next() switch token { case html.ErrorToken: return case html.TextToken, html.CommentToken, html.DoctypeToken: text := strings.TrimSpace(string(tokens.Text())) if text == "" { continue } if token == html.CommentToken { text = "<!--" + text + "-->" } if node != nil { NewText(text).Apply(node) continue } NewText(text).Apply(root) continue case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken: if token == html.EndTagToken { node = nil return } tagName, hasAttr := tokens.TagName() node = NewMarkup(string(tagName), token == html.SelfClosingTagToken) node.Apply(root) if hasAttr { attrLoop: for { key, val, more := tokens.TagAttr() NewAttr(string(key), string(val)).Apply(node) if !more { break attrLoop } } } if token == html.SelfClosingTagToken { continue } pullNode(tokens, node) } } }