Beispiel #1
0
// Gets latest version numbers of vim plugins from vim.org
// Takes one argment, the ID of the script on vim.org
// Returns a string with the version, and an error (if any)
func getVersionFromVimDotOrg(scriptID string) (string, error) {
	url := "http://www.vim.org/scripts/script.php?script_id=" + scriptID
	resp, err := http.Get(url)
	if err != nil {
		return "", err
	}

	defer resp.Body.Close()
	tokenizer := html.NewTokenizer(resp.Body)
	// vim.org doesn't annotate their html entities very well,
	// so we use this variable to keep track of which column in the table we are looking at
	// Version #'s are in the second column
	columnInDataTable := 0
	// This loop exits when we find the version, or the tokenizer runs out of input
	for {
		tokenType := tokenizer.Next()
		switch tokenType {
		case html.ErrorToken:
			// we either can't parse the HTML, or we're done
			// In either case, we haven't found a good version
			return "", tokenizer.Err()
		case html.StartTagToken:
			token := tokenizer.Token()
			// If this is a table data, it might be part of the data table
			if token.DataAtom == atom.Lookup([]byte("td")) {
				for _, attribute := range token.Attr {
					// If this is annotated with class=rowodd or roweven, this is a field in the data table
					if attribute.Key == "class" &&
						(strings.Contains(attribute.Val, "rowodd") || strings.Contains(attribute.Val, "roweven")) {
						// We have seen one more field in the data table
						columnInDataTable++
					}
				}
			}
			break
		case html.EndTagToken:
			// If this is the end of a table row, we reset the number of data fields seen
			if tokenizer.Token().DataAtom == atom.Lookup([]byte("tr")) {
				columnInDataTable = 0
			}
			break
		case html.TextToken:
			token := tokenizer.Token()
			// If this is the second column in the table, it is the version column.
			// Because vim.org sorts the data table with the most recent version at the top,
			// we can return the first version we find, as it must be the most recent.
			if columnInDataTable == 2 && strings.TrimSpace(token.String()) != "" {
				return token.String(), nil
			}
			break
		}
	}
}
Beispiel #2
0
// ParseFragment parses a fragment of HTML and returns the nodes that were
// found. If the fragment is the InnerHTML for an existing element, pass that
// element in context.
func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
	contextTag := ""
	if context != nil {
		if context.Type != ElementNode {
			return nil, errors.New("html: ParseFragment of non-element Node")
		}
		// The next check isn't just context.DataAtom.String() == context.Data because
		// it is valid to pass an element whose tag isn't a known atom. For example,
		// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
		if context.DataAtom != a.Lookup([]byte(context.Data)) {
			return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
		}
		contextTag = context.DataAtom.String()
	}
	p := &parser{
		tokenizer: NewTokenizerFragment(r, contextTag),
		doc: &Node{
			Type: DocumentNode,
		},
		scripting: true,
		fragment:  true,
		context:   context,
	}

	root := &Node{
		Type:     ElementNode,
		DataAtom: a.Html,
		Data:     a.Html.String(),
	}
	p.doc.AppendChild(root)
	p.oe = nodeStack{root}
	p.resetInsertionMode()

	for n := context; n != nil; n = n.Parent {
		if n.Type == ElementNode && n.DataAtom == a.Form {
			p.form = n
			break
		}
	}

	err := p.parse()
	if err != nil {
		return nil, err
	}

	parent := p.doc
	if context != nil {
		parent = root
	}

	var result []*Node
	for c := parent.FirstChild; c != nil; {
		next := c.NextSibling
		parent.RemoveChild(c)
		result = append(result, c)
		c = next
	}
	return result, nil
}
Beispiel #3
0
// Token returns the next Token. The result's Data and Attr values remain valid
// after subsequent Next calls.
func (z *Tokenizer) Token() Token {
	t := Token{Type: z.tt}
	switch z.tt {
	case TextToken, CommentToken, DoctypeToken:
		t.Data = string(z.Text())
	case StartTagToken, SelfClosingTagToken, EndTagToken:
		name, moreAttr := z.TagName()
		for moreAttr {
			var key, val []byte
			key, val, moreAttr = z.TagAttr()
			t.Attr = append(t.Attr, Attribute{"", atom.String(key), string(val)})
		}
		if a := atom.Lookup(name); a != 0 {
			t.DataAtom, t.Data = a, a.String()
		} else {
			t.DataAtom, t.Data = 0, string(name)
		}
	}
	return t
}
Beispiel #4
0
// Gets version numbers from PyPi website
// Returns a string with the version, and an error (if any)
func getVersionFromPyPi(name string) (string, error) {
	url := "https://pypi.python.org/pypi/" + name
	resp, err := http.Get(url)
	if err != nil {
		return "", err
	}

	defer resp.Body.Close()

	tokenizer := html.NewTokenizer(resp.Body)
	// This loop exits when we find the version, or the tokenizer runs out of input
	for {
		tokenType := tokenizer.Next()
		if tokenType == html.ErrorToken {
			// we either can't parse the HTML, or we're done
			// In either case, we haven't found a good version
			return "", tokenizer.Err()
		} else if tokenType == html.StartTagToken {
			token := tokenizer.Token()
			// Find the link with the version #
			if token.DataAtom == atom.Lookup([]byte("a")) {
				for _, attribute := range token.Attr {
					// If this link contains 'version=', it has the version #
					if attribute.Key == "href" && strings.Contains(attribute.Val, "version=") {
						// The version # is everything after the last equals sign
						index := strings.LastIndex(attribute.Val, "=")
						version := attribute.Val[index+1:]
						return version, nil
					}
				}
			} else {
				continue
			}
		}
	}
}
Beispiel #5
0
// Gets the latest version of the php mongodb drivers from pecl
// Returns a string with the version, and an error (if any)
func GetPhpMongoVersion() (string, error) {
	url := "http://pecl.php.net/package/mongo"
	resp, err := http.Get(url)
	if err != nil {
		return "", err
	}

	defer resp.Body.Close()

	tokenizer := html.NewTokenizer(resp.Body)
	// This loop exits when we find the version, or the tokenizer runs out of input
	for {
		tokenType := tokenizer.Next()
		switch tokenType {
		case html.ErrorToken:
			// we either can't parse the HTML, or we're done
			// In either case, we haven't found a good version
			return "", tokenizer.Err()
		case html.StartTagToken:
			token := tokenizer.Token()
			// Find the link with the version #
			if token.DataAtom == atom.Lookup([]byte("a")) {
				for _, attribute := range token.Attr {
					// If this link contains '/package/mongo/', it has the version #
					if attribute.Key == "href" && strings.Contains(attribute.Val, "/package/mongo/") {
						// The version # is everything after the last slash
						index := strings.LastIndex(attribute.Val, "/")
						version := attribute.Val[index+1:]
						return version, nil
					}
				}
			}
			break
		}
	}
}
Beispiel #6
0
// testParseCase tests one test case from the test files. If the test does not
// pass, it returns an error that explains the failure.
// text is the HTML to be parsed, want is a dump of the correct parse tree,
// and context is the name of the context node, if any.
func testParseCase(text, want, context string) (err error) {
	defer func() {
		if x := recover(); x != nil {
			switch e := x.(type) {
			case error:
				err = e
			default:
				err = fmt.Errorf("%v", e)
			}
		}
	}()

	var doc *Node
	if context == "" {
		doc, err = Parse(strings.NewReader(text))
		if err != nil {
			return err
		}
	} else {
		contextNode := &Node{
			Type:     ElementNode,
			DataAtom: atom.Lookup([]byte(context)),
			Data:     context,
		}
		nodes, err := ParseFragment(strings.NewReader(text), contextNode)
		if err != nil {
			return err
		}
		doc = &Node{
			Type: DocumentNode,
		}
		for _, n := range nodes {
			doc.AppendChild(n)
		}
	}

	if err := checkTreeConsistency(doc); err != nil {
		return err
	}

	got, err := dump(doc)
	if err != nil {
		return err
	}
	// Compare the parsed tree to the #document section.
	if got != want {
		return fmt.Errorf("got vs want:\n----\n%s----\n%s----", got, want)
	}

	if renderTestBlacklist[text] || context != "" {
		return nil
	}

	// Check that rendering and re-parsing results in an identical tree.
	pr, pw := io.Pipe()
	go func() {
		pw.CloseWithError(Render(pw, doc))
	}()
	doc1, err := Parse(pr)
	if err != nil {
		return err
	}
	got1, err := dump(doc1)
	if err != nil {
		return err
	}
	if got != got1 {
		return fmt.Errorf("got vs got1:\n----\n%s----\n%s----", got, got1)
	}

	return nil
}
Beispiel #7
0
// Section 12.2.5.5.
func parseForeignContent(p *parser) bool {
	switch p.tok.Type {
	case TextToken:
		if p.framesetOK {
			p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
		}
		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
		p.addText(p.tok.Data)
	case CommentToken:
		p.addChild(&Node{
			Type: CommentNode,
			Data: p.tok.Data,
		})
	case StartTagToken:
		b := breakout[p.tok.Data]
		if p.tok.DataAtom == a.Font {
		loop:
			for _, attr := range p.tok.Attr {
				switch attr.Key {
				case "color", "face", "size":
					b = true
					break loop
				}
			}
		}
		if b {
			for i := len(p.oe) - 1; i >= 0; i-- {
				n := p.oe[i]
				if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
					p.oe = p.oe[:i+1]
					break
				}
			}
			return false
		}
		switch p.top().Namespace {
		case "math":
			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
		case "svg":
			// Adjust SVG tag names. The tokenizer lower-cases tag names, but
			// SVG wants e.g. "foreignObject" with a capital second "O".
			if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
				p.tok.DataAtom = a.Lookup([]byte(x))
				p.tok.Data = x
			}
			adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
		default:
			panic("html: bad parser state: unexpected namespace")
		}
		adjustForeignAttributes(p.tok.Attr)
		namespace := p.top().Namespace
		p.addElement()
		p.top().Namespace = namespace
		if namespace != "" {
			// Don't let the tokenizer go into raw text mode in foreign content
			// (e.g. in an SVG <title> tag).
			p.tokenizer.NextIsNotRawText()
		}
		if p.hasSelfClosingToken {
			p.oe.pop()
			p.acknowledgeSelfClosingTag()
		}
	case EndTagToken:
		for i := len(p.oe) - 1; i >= 0; i-- {
			if p.oe[i].Namespace == "" {
				return p.im(p)
			}
			if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
				p.oe = p.oe[:i]
				break
			}
		}
		return true
	default:
		// Ignore the token.
	}
	return true
}