Beispiel #1
1
func parseRef(data string) []cid {
	var m = map[cid]struct{}{}
	t := html.NewTokenizer(strings.NewReader(data))
	for {
		t.Next()
		token := t.Token()
		if token.Type == html.ErrorToken {
			break
		}
		if token.Type == html.StartTagToken &&
			token.DataAtom == atom.Blockquote {
			for _, attr := range token.Attr {
				if attr.Key == "cite" {
					if s := attr.Val; strings.HasPrefix(s, "#comment-") {
						id, err := strconv.ParseUint(s[len("#comment-"):], 10, 32)
						if err != nil {
							logger.Println("notification:", err.Error())
							continue
						}
						m[cid(id)] = struct{}{}
					}
					break
				}
			}
		}
	}
	var ret []cid
	for k, _ := range m {
		ret = append(ret, k)
	}
	return ret
}
Beispiel #2
1
func findProviderFromHeadLink(input io.Reader) (opEndpoint, opLocalId string, err error) {
	tokenizer := html.NewTokenizer(input)
	inHead := false
	for {
		tt := tokenizer.Next()
		switch tt {
		case html.ErrorToken:
			// Even if the document is malformed after we found a
			// valid <link> tag, ignore and let's be happy with our
			// openid2.provider and potentially openid2.local_id as well.
			if len(opEndpoint) > 0 {
				return
			}
			return "", "", tokenizer.Err()
		case html.StartTagToken, html.EndTagToken:
			tk := tokenizer.Token()
			if tk.Data == "head" {
				if tt == html.StartTagToken {
					inHead = true
				} else {
					if len(opEndpoint) > 0 {
						return
					}
					return "", "", errors.New(
						"LINK with rel=openid2.provider not found")
				}
			} else if inHead && tk.Data == "link" {
				provider := false
				localId := false
				href := ""
				for _, attr := range tk.Attr {
					if attr.Key == "rel" {
						if attr.Val == "openid2.provider" {
							provider = true
						} else if attr.Val == "openid2.local_id" {
							localId = true
						}
					} else if attr.Key == "href" {
						href = attr.Val
					}
				}
				if provider && !localId && len(href) > 0 {
					opEndpoint = href
				} else if !provider && localId && len(href) > 0 {
					opLocalId = href
				}
			}
		}
	}
	// At this point we should probably have returned either from
	// a closing </head> or a tokenizer error (no </head> found).
	// But just in case.
	if len(opEndpoint) > 0 {
		return
	}
	return "", "", errors.New("LINK rel=openid2.provider not found")
}
Beispiel #3
0
// Search for
// <head>
//    <meta http-equiv="X-XRDS-Location" content="....">
func findMetaXrdsLocation(input io.Reader) (location string, err error) {
	tokenizer := html.NewTokenizer(input)
	inHead := false
	for {
		tt := tokenizer.Next()
		switch tt {
		case html.ErrorToken:
			return "", tokenizer.Err()
		case html.StartTagToken, html.EndTagToken:
			tk := tokenizer.Token()
			if tk.Data == "head" {
				if tt == html.StartTagToken {
					inHead = true
				} else {
					return "", errors.New("Meta X-XRDS-Location not found")
				}
			} else if inHead && tk.Data == "meta" {
				ok := false
				content := ""
				for _, attr := range tk.Attr {
					if attr.Key == "http-equiv" &&
						attr.Val == "X-XRDS-Location" {
						ok = true
					} else if attr.Key == "content" {
						content = attr.Val
					}
				}
				if ok && len(content) > 0 {
					return content, nil
				}
			}
		}
	}
	return "", errors.New("Meta X-XRDS-Location not found")
}
Beispiel #4
0
func htmlFilter(content string) (string, error) {
	var ret string
	ret = "<p>"
	t := html.NewTokenizer(strings.NewReader(content))
	stack := make([]atom.Atom, 0)
L:
	for {
		t.Next()
		token := t.Token()
		str := token.String()
		switch token.Type {
		case html.StartTagToken, html.SelfClosingTagToken:
			ans := false
			if attrMap, ex := validAtom[token.DataAtom]; ex {
				ans = true
				for _, attr := range token.Attr {
					if _, ex := attrMap[attr.Key]; !ex {
						ans = false
						break
					}
				}
			}
			if ans {
				stack = append(stack, token.DataAtom)
				ret += str
			} else {
				ret += html.EscapeString(str)
			}
		case html.EndTagToken:
			var top int = len(stack) - 1
			for top >= 0 && stack[top] != token.DataAtom {
				top--
			}
			if top == -1 {
				ret += html.EscapeString(str)
			} else {
				stack = stack[0:top]
				ret += str
			}
		case html.TextToken:
			ret += str
		case html.ErrorToken:
			break L
		}
	}
	if err := t.Err(); err != io.EOF {
		return "", err
	}
	for len(stack) > 0 {
		ret += "</" + stack[len(stack)-1].String() + ">"
		stack = stack[:len(stack)-1]
	}
	ret += "</p>"
	return ret, nil
}
Beispiel #5
0
func TestNextTextFilter(t *testing.T) {
	src := `<html>
	<p>
		<a name="foo"/>
		<small>
			<font face="Arial">
				Foo
				<sup>
					<u>
						<b>
							Bar
						</b>
					</u>
				</sup>
			</font>
		</small>
		<a href="/path/to/somewhere">
			<i>
				Baz
			</i>
		</a>
	</p>

	<p>
		<span>
			Ding
		</span>
	</p>

</html>`

	expected := []string{
		"<p><a/>Foo<sup>Bar</sup><a>Baz</a></p>",
		"<p>Ding</p>",
	}

	r := bytes.NewBufferString(src)
	d := html.NewTokenizer(r)

	for _, v := range expected {
		node, err := NextTextFilter(d, "p", "a", "sup")
		if err != nil {
			t.Fatal(err)
		}
		if node.String() != v {
			t.Errorf("expected %q, got %q", v, node.String())
		}
	}
}
Beispiel #6
0
func process(content string) string {
	ret := ""
	t := html.NewTokenizer(strings.NewReader(content))
	latex := false
	latexSrc := ""
L:
	for {
		t.Next()
		token := t.Token()
		str := token.String()
		if latex {
			switch token.Type {
			case html.ErrorToken:
				break L
			case html.EndTagToken:
				if token.Data == "latex" {
					latex = false
					ret += fmt.Sprintf("<img src=\"%s\" alt=\"%s\"/>", genLaTeX(html.UnescapeString(latexSrc)), latexSrc)
					latexSrc = ""
				} else {
					latexSrc += str
				}
			default:
				latexSrc += str
			}
		} else {
			switch token.Type {
			case html.ErrorToken:
				break L
			case html.StartTagToken:
				if token.Data == "latex" {
					latex = true
				} else {
					ret += str
				}
			default:
				ret += str
			}
		}
	}
	return ret
}
Beispiel #7
0
func extractLinks(resp io.Reader) []string {
	var links = make([]string, 1000)
	anchorTag := []byte{'a'}
	tkzer := html.NewTokenizer(resp)
	var more bool
	var value []byte
	var key []byte
	eof := false
	i := 0
	for {
		if eof == true {
			break
		}
		switch tkzer.Next() {
		case html.ErrorToken:
			if tkzer.Err() == io.EOF {
				eof = true
			}
		case html.StartTagToken:
			tag, hasAttr := tkzer.TagName()
			if hasAttr && bytes.Equal(anchorTag, tag) {
				more = true
				for more == true {
					key, value, more = tkzer.TagAttr()
					if string(key) == "href" {
						more = false
						fmt.Printf("%d\n", string(value), len(links))
						links[i] = string(value)
						i++
					}
				}
			}
		}
	}
	return links
}