func getInclude(z *html.Tokenizer, attrs []html.Attribute) (startMarker, endMarker string, error error) {
	var srcString string
	if url, hasUrl := getAttr(attrs, "src"); !hasUrl {
		return "", "", fmt.Errorf("include definition without src %s", z.Raw())
	} else {
		srcString = strings.TrimSpace(url.Val)
		if strings.HasPrefix(srcString, "#") {
			srcString = srcString[1:]
		}
	}

	required := false
	if r, hasRequired := getAttr(attrs, "required"); hasRequired {
		if requiredBool, err := strconv.ParseBool(r.Val); err != nil {
			return "", "", fmt.Errorf("error parsing bool in %s: %s", z.Raw(), err.Error())
		} else {
			required = requiredBool
		}
	}

	if required {
		return fmt.Sprintf("§[> %s]§", srcString), "", nil
	} else {
		return fmt.Sprintf("§[#> %s]§", srcString), fmt.Sprintf("§[/%s]§", srcString), nil
	}
}
Example #2
0
File: atom.go Project: lufia/news
func buildHTML(tokenizer *html.Tokenizer) (s string, err error) {
	buf := new(bytes.Buffer)

	bp := 0
	if tag, _ := tokenizer.TagName(); string(tag) == "div" {
		div := tokenizer.Raw()
		buf.Write(div)
		bp = len(div)
		err = nextToken(tokenizer)
	}

	ep := bp
	for err != io.EOF {
		if err != nil && err != io.EOF {
			return
		}
		ep = buf.Len()
		b := tokenizer.Raw()
		if _, err := buf.Write(b); err != nil {
			return "", err
		}
		err = nextToken(tokenizer)
	}
	b := buf.Bytes()
	if bp > 0 {
		b = b[bp:ep]
	}
	return string(b), nil
}
func parseFragment(z *html.Tokenizer) (f Fragment, dependencies []*FetchDefinition, err error) {
	attrs := make([]html.Attribute, 0, 10)
	dependencies = make([]*FetchDefinition, 0, 0)

	buff := bytes.NewBuffer(nil)
forloop:
	for {
		tt := z.Next()
		tag, _ := z.TagName()
		raw := byteCopy(z.Raw()) // create a copy here, because readAttributes modifies z.Raw, if attributes contain an &
		attrs = readAttributes(z, attrs)

		switch {
		case tt == html.ErrorToken:
			if z.Err() != io.EOF {
				return nil, nil, z.Err()
			}
			break forloop
		case tt == html.StartTagToken || tt == html.SelfClosingTagToken:
			if string(tag) == UicInclude {
				if replaceTextStart, replaceTextEnd, err := getInclude(z, attrs); err != nil {
					return nil, nil, err
				} else {
					fmt.Fprintf(buff, replaceTextStart)
					// Enhancement: WriteOut sub tree, to allow alternative content
					//              for optional includes.
					fmt.Fprintf(buff, replaceTextEnd)
					continue
				}
			}

			if skipSubtreeIfUicRemove(z, tt, string(tag), attrs) {
				continue
			}

		case tt == html.EndTagToken:
			if string(tag) == UicFragment || string(tag) == UicTail {
				break forloop
			}
		}
		buff.Write(raw)
	}

	return StringFragment(buff.String()), dependencies, nil
}
func (parser *HtmlContentParser) parseHead(z *html.Tokenizer, c *MemoryContent) error {
	attrs := make([]html.Attribute, 0, 10)
	headBuff := bytes.NewBuffer(nil)

forloop:
	for {
		tt := z.Next()
		tag, _ := z.TagName()
		raw := byteCopy(z.Raw()) // create a copy here, because readAttributes modifies z.Raw, if attributes contain an &
		attrs = readAttributes(z, attrs)

		switch {
		case tt == html.ErrorToken:
			if z.Err() != io.EOF {
				return z.Err()
			}
			break forloop
		case tt == html.StartTagToken || tt == html.SelfClosingTagToken:
			if skipSubtreeIfUicRemove(z, tt, string(tag), attrs) {
				continue
			}
			if string(tag) == "script" && attrHasValue(attrs, "type", ScriptTypeMeta) {
				if err := parseMetaJson(z, c); err != nil {
					return err
				}
				continue
			}
		case tt == html.EndTagToken:
			if string(tag) == "head" {
				break forloop
			}
		}
		headBuff.Write(raw)
	}

	s := headBuff.String()
	st := strings.Trim(s, " \n")
	if len(st) > 0 {
		c.head = StringFragment(st)
	}
	return nil
}
func parseMetaJson(z *html.Tokenizer, c *MemoryContent) error {
	tt := z.Next()
	if tt != html.TextToken {
		return fmt.Errorf("expected text node for meta json, but found %v, (%s)", tt.String(), z.Raw())
	}

	bytes := z.Text()
	err := json.Unmarshal(bytes, &c.meta)
	if err != nil {
		return fmt.Errorf("error while parsing json from meta json element: %v", err.Error())
	}

	tt = z.Next()
	tag, _ := z.TagName()
	if tt != html.EndTagToken || string(tag) != "script" {
		return fmt.Errorf("Tag not properly ended. Expected </script>, but found %s", z.Raw())
	}

	return nil
}
Example #6
0
func parseToken(tokenizer *html.Tokenizer, htmlDoc *htmlDocument, parent *tagElement) (bool, bool, string) {
	tokenType := tokenizer.Next()
	switch tokenType {
	case html.ErrorToken:
		return true, false, ""
	case html.TextToken:
		text := string(tokenizer.Text())
		if strings.TrimSpace(text) == "" {
			break
		}
		textElement := &textElement{text: text}
		appendElement(htmlDoc, parent, textElement)
	case html.StartTagToken:
		tagElement := &tagElement{tagName: getTagName(tokenizer), startTagRaw: string(tokenizer.Raw())}
		appendElement(htmlDoc, parent, tagElement)
		for {
			errorToken, parentEnded, unsetEndTag := parseToken(tokenizer, htmlDoc, tagElement)
			if errorToken {
				return true, false, ""
			}
			if parentEnded {
				if unsetEndTag != "" {
					return false, false, unsetEndTag
				}
				break
			}
			if unsetEndTag != "" {
				return false, false, setEndTagRaw(tokenizer, tagElement, unsetEndTag)
			}
		}
	case html.EndTagToken:
		return false, true, setEndTagRaw(tokenizer, parent, getTagName(tokenizer))
	case html.DoctypeToken, html.SelfClosingTagToken, html.CommentToken:
		tagElement := &tagElement{tagName: getTagName(tokenizer), startTagRaw: string(tokenizer.Raw())}
		appendElement(htmlDoc, parent, tagElement)
	}
	return false, false, ""
}
func getFetch(z *html.Tokenizer, attrs []html.Attribute) (*FetchDefinition, error) {
	fd := &FetchDefinition{}

	url, hasUrl := getAttr(attrs, "src")
	if !hasUrl {
		return nil, fmt.Errorf("include definition without src %s", z.Raw())
	}
	fd.URL = strings.TrimSpace(url.Val)

	if name, hasName := getAttr(attrs, "name"); hasName {
		fd.Name = name.Val
	} else {
		fd.Name = urlToName(fd.URL)
	}

	if timeout, hasTimeout := getAttr(attrs, "timeout"); hasTimeout {
		if timeoutInt, err := strconv.Atoi(timeout.Val); err != nil {
			return nil, fmt.Errorf("error parsing timeout in %s: %s", z.Raw(), err.Error())
		} else {
			fd.Timeout = time.Millisecond * time.Duration(timeoutInt)
		}
	}

	if required, hasRequired := getAttr(attrs, "required"); hasRequired {
		if requiredBool, err := strconv.ParseBool(required.Val); err != nil {
			return nil, fmt.Errorf("error parsing bool in %s: %s", z.Raw(), err.Error())
		} else {
			fd.Required = requiredBool
		}
	}

	attr, found := getAttr(attrs, "discoveredby")
	if found {
		fd.DiscoveredBy(attr.Val)
	}

	return fd, nil
}
func (parser *HtmlContentParser) parseBody(z *html.Tokenizer, c *MemoryContent) error {
	attrs := make([]html.Attribute, 0, 10)
	bodyBuff := bytes.NewBuffer(nil)

	attrs = readAttributes(z, attrs)
	if len(attrs) > 0 {
		c.bodyAttributes = StringFragment(joinAttrs(attrs))
	}

forloop:
	for {
		tt := z.Next()
		tag, _ := z.TagName()
		raw := byteCopy(z.Raw()) // create a copy here, because readAttributes modifies z.Raw, if attributes contain an &
		attrs = readAttributes(z, attrs)

		switch {
		case tt == html.ErrorToken:
			if z.Err() != io.EOF {
				return z.Err()
			}
			break forloop
		case tt == html.StartTagToken || tt == html.SelfClosingTagToken:
			if skipSubtreeIfUicRemove(z, tt, string(tag), attrs) {
				continue
			}
			if string(tag) == UicFragment {
				if f, deps, err := parseFragment(z); err != nil {
					return err
				} else {
					c.body[getFragmentName(attrs)] = f
					for _, dep := range deps {
						c.requiredContent[dep.URL] = dep
					}
				}
				continue
			}
			if string(tag) == UicTail {
				if f, deps, err := parseFragment(z); err != nil {
					return err
				} else {
					c.tail = f
					for _, dep := range deps {
						c.requiredContent[dep.URL] = dep
					}
				}
				continue
			}
			if string(tag) == UicFetch {
				if fd, err := getFetch(z, attrs); err != nil {
					return err
				} else {
					c.requiredContent[fd.URL] = fd
					continue
				}
			}
			if string(tag) == UicInclude {
				if replaceTextStart, replaceTextEnd, err := getInclude(z, attrs); err != nil {
					return err
				} else {
					bodyBuff.WriteString(replaceTextStart)
					// Enhancement: WriteOut sub tree, to allow alternative content
					//              for optional includes.
					bodyBuff.WriteString(replaceTextEnd)
					continue
				}
			}

		case tt == html.EndTagToken:
			if string(tag) == "body" {
				break forloop
			}
		}
		bodyBuff.Write(raw)
	}

	s := bodyBuff.String()
	if _, defaultFragmentExists := c.body[""]; !defaultFragmentExists {
		if st := strings.Trim(s, " \n"); len(st) > 0 {
			c.body[""] = StringFragment(st)
		}
	}

	return nil
}
Example #9
-1
// setEndTagRaw sets an endTagRaw to the parent.
func setEndTagRaw(tokenizer *html.Tokenizer, parent *tagElement, tagName string) string {
	if parent != nil && parent.tagName == tagName {
		parent.endTagRaw = string(tokenizer.Raw())
		return ""
	}
	return tagName
}