Exemple #1
0
func getText(z *html.Tokenizer) string {
	tt := z.Next()
	switch tt {
	case html.ErrorToken:
		panic(z.Err())
	case html.TextToken:
		return string(z.Text())
	}
	return ""
}
func (item *AnimeConventionItem) Parse(t *html.Tokenizer) {
	for {
		label := t.Next()
		switch label {
		case html.ErrorToken:
			fmt.Errorf("%v\n", t.Err())
			return
		case html.TextToken:
			switch string(t.Text()) {
			case "Advance Rates:":
				//fmt.Println("rate")
				item.readadvanceRate(t)
			case "At-Door Rates:":
				item.readatDoorRate(t)
			}
		case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken:
			tag, hasmore := t.TagName()
			if strings.EqualFold(string(tag), "big") {
				item.readResgiterNowurl(t)
			} else if hasmore {
				key, val, hasmore := t.TagAttr()
				if strings.EqualFold(string(key), "itemprop") {
					//fmt.Println(string(val))
					switch string(val) {
					case "description":
						item.readDescription(t)
					case "latitude":
						item.readLatitude(t)
					case "longitude":
						item.readLongitude(t)
					case "startDate":
						item.readStartDate(t)
					case "endDate":
						item.readEndDate(t)
					case "location":
						item.readLocation(t)
					case "addressLocality":
						item.readCity(t)
					case "addressRegion":
						item.readState(t)
					case "addressCountry":
						item.readCountry(t, hasmore)
					case "name":
						item.readNameAndLink(t)
					}
				}
			}
		}
	}
}
func parseFragment(z *html.Tokenizer) (f Fragment, dependencies []*FetchDefinition, err error) {
	attrs := make([]html.Attribute, 0, 10)
	dependencies = make([]*FetchDefinition, 0, 0)

	buff := bytes.NewBuffer(nil)
forloop:
	for {
		tt := z.Next()
		tag, _ := z.TagName()
		raw := byteCopy(z.Raw()) // create a copy here, because readAttributes modifies z.Raw, if attributes contain an &
		attrs = readAttributes(z, attrs)

		switch {
		case tt == html.ErrorToken:
			if z.Err() != io.EOF {
				return nil, nil, z.Err()
			}
			break forloop
		case tt == html.StartTagToken || tt == html.SelfClosingTagToken:
			if string(tag) == UicInclude {
				if replaceTextStart, replaceTextEnd, err := getInclude(z, attrs); err != nil {
					return nil, nil, err
				} else {
					fmt.Fprintf(buff, replaceTextStart)
					// Enhancement: WriteOut sub tree, to allow alternative content
					//              for optional includes.
					fmt.Fprintf(buff, replaceTextEnd)
					continue
				}
			}

			if skipSubtreeIfUicRemove(z, tt, string(tag), attrs) {
				continue
			}

		case tt == html.EndTagToken:
			if string(tag) == UicFragment || string(tag) == UicTail {
				break forloop
			}
		}
		buff.Write(raw)
	}

	return StringFragment(buff.String()), dependencies, nil
}
func (parser *HtmlContentParser) parseHead(z *html.Tokenizer, c *MemoryContent) error {
	attrs := make([]html.Attribute, 0, 10)
	headBuff := bytes.NewBuffer(nil)

forloop:
	for {
		tt := z.Next()
		tag, _ := z.TagName()
		raw := byteCopy(z.Raw()) // create a copy here, because readAttributes modifies z.Raw, if attributes contain an &
		attrs = readAttributes(z, attrs)

		switch {
		case tt == html.ErrorToken:
			if z.Err() != io.EOF {
				return z.Err()
			}
			break forloop
		case tt == html.StartTagToken || tt == html.SelfClosingTagToken:
			if skipSubtreeIfUicRemove(z, tt, string(tag), attrs) {
				continue
			}
			if string(tag) == "script" && attrHasValue(attrs, "type", ScriptTypeMeta) {
				if err := parseMetaJson(z, c); err != nil {
					return err
				}
				continue
			}
		case tt == html.EndTagToken:
			if string(tag) == "head" {
				break forloop
			}
		}
		headBuff.Write(raw)
	}

	s := headBuff.String()
	st := strings.Trim(s, " \n")
	if len(st) > 0 {
		c.head = StringFragment(st)
	}
	return nil
}
func skipCompleteTag(z *html.Tokenizer, tagName string) error {
forloop:
	for {
		tt := z.Next()
		tag, _ := z.TagName()
		switch {
		case tt == html.ErrorToken:
			if z.Err() != io.EOF {
				return z.Err()
			}
			break forloop
		case tt == html.EndTagToken:
			tagAsString := string(tag)
			if tagAsString == tagName {
				break forloop
			}
		}
	}
	return nil
}
Exemple #6
0
func getTextR(z *html.Tokenizer) string {
	r := ""
	depth := 1
	for {
		tt := z.Next()
		switch tt {
		case html.ErrorToken:
			panic(z.Err())
		case html.TextToken:
			r += string(z.Text())
		case html.StartTagToken:
			tn, _ := z.TagName()
			tns := strings.ToLower(string(tn))
			switch tns {
			case "div":
				r += "\r"
				depth++
			case "span":
				r += "'"
				depth++
			}
		case html.EndTagToken:
			tn, _ := z.TagName()
			tns := strings.ToLower(string(tn))
			switch tns {
			case "div":
				depth--
			case "span":
				r += "'"
				depth--
			}
		}
		if depth == 0 {
			return r
		}
	}
}
func (parser *HtmlContentParser) parseBody(z *html.Tokenizer, c *MemoryContent) error {
	attrs := make([]html.Attribute, 0, 10)
	bodyBuff := bytes.NewBuffer(nil)

	attrs = readAttributes(z, attrs)
	if len(attrs) > 0 {
		c.bodyAttributes = StringFragment(joinAttrs(attrs))
	}

forloop:
	for {
		tt := z.Next()
		tag, _ := z.TagName()
		raw := byteCopy(z.Raw()) // create a copy here, because readAttributes modifies z.Raw, if attributes contain an &
		attrs = readAttributes(z, attrs)

		switch {
		case tt == html.ErrorToken:
			if z.Err() != io.EOF {
				return z.Err()
			}
			break forloop
		case tt == html.StartTagToken || tt == html.SelfClosingTagToken:
			if skipSubtreeIfUicRemove(z, tt, string(tag), attrs) {
				continue
			}
			if string(tag) == UicFragment {
				if f, deps, err := parseFragment(z); err != nil {
					return err
				} else {
					c.body[getFragmentName(attrs)] = f
					for _, dep := range deps {
						c.requiredContent[dep.URL] = dep
					}
				}
				continue
			}
			if string(tag) == UicTail {
				if f, deps, err := parseFragment(z); err != nil {
					return err
				} else {
					c.tail = f
					for _, dep := range deps {
						c.requiredContent[dep.URL] = dep
					}
				}
				continue
			}
			if string(tag) == UicFetch {
				if fd, err := getFetch(z, attrs); err != nil {
					return err
				} else {
					c.requiredContent[fd.URL] = fd
					continue
				}
			}
			if string(tag) == UicInclude {
				if replaceTextStart, replaceTextEnd, err := getInclude(z, attrs); err != nil {
					return err
				} else {
					bodyBuff.WriteString(replaceTextStart)
					// Enhancement: WriteOut sub tree, to allow alternative content
					//              for optional includes.
					bodyBuff.WriteString(replaceTextEnd)
					continue
				}
			}

		case tt == html.EndTagToken:
			if string(tag) == "body" {
				break forloop
			}
		}
		bodyBuff.Write(raw)
	}

	s := bodyBuff.String()
	if _, defaultFragmentExists := c.body[""]; !defaultFragmentExists {
		if st := strings.Trim(s, " \n"); len(st) > 0 {
			c.body[""] = StringFragment(st)
		}
	}

	return nil
}
Exemple #8
0
func nextToken(tokenizer *html.Tokenizer) error {
	if t := tokenizer.Next(); t == html.ErrorToken {
		return tokenizer.Err()
	}
	return nil
}