func recurse(p *xml.Parser, name string, m TagMap) os.Error { // depth++ // debugging for { // if depth > maxDepth { // debugging // return nil // } tok, err := p.Token() if err != nil { return err } switch t := tok.(type) { case xml.StartElement: if m[t.Name.Local] == nil { m[t.Name.Local] = TagMap{} } err = recurse(p, t.Name.Local, m[t.Name.Local]) if err != nil { return err } case xml.EndElement: // If ending the element we entered recurse for, return if t.Name.Local == name { return nil } } } return nil }
// Scan XML token stream to find next StartElement. func nextStart(p *xml.Parser) (xml.StartElement, os.Error) { for { t, err := p.Token() if err != nil { log.Fatal("token", err) } switch t := t.(type) { case xml.StartElement: return t, nil } } panic("unreachable") }
func getTagContents(parser *xml.Parser) string { tag, err := parser.Token() if err != nil { return "" } switch dtype := tag.(type) { case xml.CharData: bytes := xml.CharData(dtype) text := string([]byte(bytes)) return text } return "" }
func start(p *xml.Parser, name string, m TagMap) os.Error { for { // if depth > maxDepth { // debugging // return nil // } tok, err := p.Token() if err != nil { return err } switch t := tok.(type) { case xml.StartElement: if t.Name.Local == name { if _, found := m[name]; !found { m[name] = TagMap{} } err = recurse(p, name, m[name]) } } if err != nil { return err } } return nil }
func parse(tok xml.Token, parser *xml.Parser, parent *Node) (node *Node, err os.Error) { node = new(Node) node.Parent = parent st, _ := tok.(xml.StartElement) node.Name = st.Name.Local trace("parsing tag %s", node.Name) node.Attr = []html.Attribute{} for _, attr := range st.Attr { a := html.Attribute{Key: attr.Name.Local, Val: attr.Value} node.Attr = append(node.Attr, a) } // var childs vector.Vector // var chld []*Node for done := false; !done; { var tok xml.Token tok, err = parser.Token() if err != nil { if err == os.EOF { err = nil break } if node.Name == "script" { err = os.ErrorString("Javascript: " + err.String()) } return } switch t := tok.(type) { case xml.StartElement: var ch *Node ch, err = parse(t, parser, node) if err != nil { return } node.Child = append(node.Child, ch) node.subs = append(node.subs, ch) if node.Full != "" { node.Full += " " } node.Full += ch.Full case xml.EndElement: if t.Name.Local != node.Name { fmt.Printf("Tag " + node.Name + " closed by " + t.Name.Local + "\n") } done = true case xml.CharData: b := bytes.NewBuffer([]byte(t)) s := b.String() ct := " " + cleanText(s) node.Text += ct node.Full += ct node.subs = append(node.subs, &Node{Parent: node, Name: TEXT_NODE, Text: s}) case xml.Comment, xml.Directive, xml.ProcInst: // skip default: fmt.Printf("Very strange:\nType = %t\n Value = %#v\n", tok, tok) } } node.Text = strings.Trim(node.Text, " \n\t\r") node.Full = cleanText(node.Full) prepareClasses(node) trace("Made Node: " + node.String() + "\n") return }