func getText(z *html.Tokenizer) string { tt := z.Next() switch tt { case html.ErrorToken: panic(z.Err()) case html.TextToken: return string(z.Text()) } return "" }
func (item *AnimeConventionItem) Parse(t *html.Tokenizer) { for { label := t.Next() switch label { case html.ErrorToken: fmt.Errorf("%v\n", t.Err()) return case html.TextToken: switch string(t.Text()) { case "Advance Rates:": //fmt.Println("rate") item.readadvanceRate(t) case "At-Door Rates:": item.readatDoorRate(t) } case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken: tag, hasmore := t.TagName() if strings.EqualFold(string(tag), "big") { item.readResgiterNowurl(t) } else if hasmore { key, val, hasmore := t.TagAttr() if strings.EqualFold(string(key), "itemprop") { //fmt.Println(string(val)) switch string(val) { case "description": item.readDescription(t) case "latitude": item.readLatitude(t) case "longitude": item.readLongitude(t) case "startDate": item.readStartDate(t) case "endDate": item.readEndDate(t) case "location": item.readLocation(t) case "addressLocality": item.readCity(t) case "addressRegion": item.readState(t) case "addressCountry": item.readCountry(t, hasmore) case "name": item.readNameAndLink(t) } } } } } }
func parseFragment(z *html.Tokenizer) (f Fragment, dependencies []*FetchDefinition, err error) { attrs := make([]html.Attribute, 0, 10) dependencies = make([]*FetchDefinition, 0, 0) buff := bytes.NewBuffer(nil) forloop: for { tt := z.Next() tag, _ := z.TagName() raw := byteCopy(z.Raw()) // create a copy here, because readAttributes modifies z.Raw, if attributes contain an & attrs = readAttributes(z, attrs) switch { case tt == html.ErrorToken: if z.Err() != io.EOF { return nil, nil, z.Err() } break forloop case tt == html.StartTagToken || tt == html.SelfClosingTagToken: if string(tag) == UicInclude { if replaceTextStart, replaceTextEnd, err := getInclude(z, attrs); err != nil { return nil, nil, err } else { fmt.Fprintf(buff, replaceTextStart) // Enhancement: WriteOut sub tree, to allow alternative content // for optional includes. fmt.Fprintf(buff, replaceTextEnd) continue } } if skipSubtreeIfUicRemove(z, tt, string(tag), attrs) { continue } case tt == html.EndTagToken: if string(tag) == UicFragment || string(tag) == UicTail { break forloop } } buff.Write(raw) } return StringFragment(buff.String()), dependencies, nil }
func (parser *HtmlContentParser) parseHead(z *html.Tokenizer, c *MemoryContent) error { attrs := make([]html.Attribute, 0, 10) headBuff := bytes.NewBuffer(nil) forloop: for { tt := z.Next() tag, _ := z.TagName() raw := byteCopy(z.Raw()) // create a copy here, because readAttributes modifies z.Raw, if attributes contain an & attrs = readAttributes(z, attrs) switch { case tt == html.ErrorToken: if z.Err() != io.EOF { return z.Err() } break forloop case tt == html.StartTagToken || tt == html.SelfClosingTagToken: if skipSubtreeIfUicRemove(z, tt, string(tag), attrs) { continue } if string(tag) == "script" && attrHasValue(attrs, "type", ScriptTypeMeta) { if err := parseMetaJson(z, c); err != nil { return err } continue } case tt == html.EndTagToken: if string(tag) == "head" { break forloop } } headBuff.Write(raw) } s := headBuff.String() st := strings.Trim(s, " \n") if len(st) > 0 { c.head = StringFragment(st) } return nil }
func skipCompleteTag(z *html.Tokenizer, tagName string) error { forloop: for { tt := z.Next() tag, _ := z.TagName() switch { case tt == html.ErrorToken: if z.Err() != io.EOF { return z.Err() } break forloop case tt == html.EndTagToken: tagAsString := string(tag) if tagAsString == tagName { break forloop } } } return nil }
func getTextR(z *html.Tokenizer) string { r := "" depth := 1 for { tt := z.Next() switch tt { case html.ErrorToken: panic(z.Err()) case html.TextToken: r += string(z.Text()) case html.StartTagToken: tn, _ := z.TagName() tns := strings.ToLower(string(tn)) switch tns { case "div": r += "\r" depth++ case "span": r += "'" depth++ } case html.EndTagToken: tn, _ := z.TagName() tns := strings.ToLower(string(tn)) switch tns { case "div": depth-- case "span": r += "'" depth-- } } if depth == 0 { return r } } }
func (parser *HtmlContentParser) parseBody(z *html.Tokenizer, c *MemoryContent) error { attrs := make([]html.Attribute, 0, 10) bodyBuff := bytes.NewBuffer(nil) attrs = readAttributes(z, attrs) if len(attrs) > 0 { c.bodyAttributes = StringFragment(joinAttrs(attrs)) } forloop: for { tt := z.Next() tag, _ := z.TagName() raw := byteCopy(z.Raw()) // create a copy here, because readAttributes modifies z.Raw, if attributes contain an & attrs = readAttributes(z, attrs) switch { case tt == html.ErrorToken: if z.Err() != io.EOF { return z.Err() } break forloop case tt == html.StartTagToken || tt == html.SelfClosingTagToken: if skipSubtreeIfUicRemove(z, tt, string(tag), attrs) { continue } if string(tag) == UicFragment { if f, deps, err := parseFragment(z); err != nil { return err } else { c.body[getFragmentName(attrs)] = f for _, dep := range deps { c.requiredContent[dep.URL] = dep } } continue } if string(tag) == UicTail { if f, deps, err := parseFragment(z); err != nil { return err } else { c.tail = f for _, dep := range deps { c.requiredContent[dep.URL] = dep } } continue } if string(tag) == UicFetch { if fd, err := getFetch(z, attrs); err != nil { return err } else { c.requiredContent[fd.URL] = fd continue } } if string(tag) == UicInclude { if replaceTextStart, replaceTextEnd, err := getInclude(z, attrs); err != nil { return err } else { bodyBuff.WriteString(replaceTextStart) // Enhancement: WriteOut sub tree, to allow alternative content // for optional includes. bodyBuff.WriteString(replaceTextEnd) continue } } case tt == html.EndTagToken: if string(tag) == "body" { break forloop } } bodyBuff.Write(raw) } s := bodyBuff.String() if _, defaultFragmentExists := c.body[""]; !defaultFragmentExists { if st := strings.Trim(s, " \n"); len(st) > 0 { c.body[""] = StringFragment(st) } } return nil }
func nextToken(tokenizer *html.Tokenizer) error { if t := tokenizer.Next(); t == html.ErrorToken { return tokenizer.Err() } return nil }