// unmarshalPath walks down an XML structure looking for wanted // paths, and calls unmarshal on them. // The consumed result tells whether XML elements have been consumed // from the Decoder until start's matching end element, or if it's // still untouched because start is uninteresting for sv's fields. func (p *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start gokoxml.Node) (err error) { recurse := false name := start.Name() // For speed Loop: for i := range tinfo.fields { finfo := &tinfo.fields[i] if finfo.flags&fElement == 0 || len(finfo.parents) < len(parents) { continue } for j := range parents { if parents[j] != finfo.parents[j] { continue Loop } } if len(finfo.parents) == len(parents) && finfo.name == name { // It's a perfect match, unmarshal the field. return p.unmarshal(sv.FieldByIndex(finfo.idx), start) } if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == name { // It's a prefix for the field. Break and recurse // since it's not ok for one field path to be itself // the prefix for another field path. recurse = true // We can reuse the same slice as long as we // don't try to append to it. parents = finfo.parents[:len(parents)+1] break } } if !recurse { // We have no business with this element. return nil } // The element is not a perfect match for any field, but one // or more fields have the path to this element as a parent // prefix. Recurse and attempt to match these. for cur_node := start.FirstChild(); cur_node != nil; cur_node = cur_node.NextSibling() { if cur_node.NodeType() != gokoxml.XML_ELEMENT_NODE { continue } if err := p.unmarshalPath(tinfo, sv, parents, cur_node); err != nil { return err } } // No more XML Nodes. return nil }
// Formats the content of inline elements and writes the content of block // elements to the buffer. func (self *Formatter) handleNode(node xml.Node) { name := node.Name() switch { case ignore[name]: // Remove ignored elements. node.SetContent("") case name == "pre": // Treat pre elements as code blocks. self.writeCodeBlock(node) case heading[name]: // Headings are prefixed with "# ". self.writeBlock(node, "# ") case name == "li": // List items are prefixed with "- ". self.writeBlock(node, "- ") case name == "br": // Preserve explicit line breaks. node.SetContent("\n") case italic[name]: // Wrap italic elements with /. node.SetContent("/" + node.Content() + "/") case bold[name]: // Wrap bold elements with *. node.SetContent("*" + node.Content() + "*") case name == "img": // Collect the src of images and replace them with (alt)[url index] alt, src := node.Attr("alt"), node.Attr("src") if len(alt) > 0 && len(src) > 0 { node.SetContent(fmt.Sprintf("(%s)[%d]", alt, len(self.links))) self.links = append(self.links, src) } case name == "a": // Collect the href and and the url index. href, content := node.Attr("href"), node.Content() if len(href) > 0 && len(content) > 0 { node.SetContent(fmt.Sprintf("%s[%d]", content, len(self.links))) self.links = append(self.links, href) } case block[name]: // Write the content of block elements to the buffer. self.writeBlock(node, "") } }
func newCadidate(elem xml.Node) *Candidate { this := &Candidate{elem, 0} switch strings.ToLower(elem.Name()) { case "div": this.score = this.weight() + 5.0 break case "blockquote": this.score = this.weight() + 3.0 break case "form": this.score = this.weight() - 3.0 break case "th": this.score = this.weight() - 5.0 break default: this.score = this.weight() break } return this }
func (p *Decoder) unmarshal(val reflect.Value, start gokoxml.Node) error { // Find first xml node. if start == nil { start = p.doc.Root().XmlNode } // Unpacks a pointer if pv := val; pv.Kind() == reflect.Ptr { if pv.IsNil() { pv.Set(reflect.New(pv.Type().Elem())) } val = pv.Elem() } var ( sv reflect.Value tinfo *typeInfo err error ) switch v := val; v.Kind() { default: return errors.New("unknown type " + v.Type().String()) // TODO: Implement this once i understand Skip() // case reflect.Interface: // return p.Skip() case reflect.Slice: typ := v.Type() if typ.Elem().Kind() == reflect.Uint8 { // []byte if err := copyValue(v, start.Content()); err != nil { return err } break } // Slice of element values. // Grow slice. n := v.Len() if n >= v.Cap() { ncap := 2 * n if ncap < 4 { ncap = 4 } new := reflect.MakeSlice(typ, n, ncap) reflect.Copy(new, v) v.Set(new) } v.SetLen(n + 1) // Recur to read element into slice. if err := p.unmarshal(v.Index(n), start); err != nil { v.SetLen(n) return err } return nil case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String: if err := copyValue(v, start.Content()); err != nil { return err } case reflect.Struct: typ := v.Type() if typ == nameType { v.Set(reflect.ValueOf(xml.Name{Local: start.Name()})) break } if typ == timeType { if err := copyValue(v, start.Content()); err != nil { return err } break } sv = v tinfo, err = getTypeInfo(typ) if err != nil { return err } // Validate and assign element name. if tinfo.xmlname != nil { // var space string finfo := tinfo.xmlname if finfo.name != "" && finfo.name != start.Name() { return UnmarshalError("expected element type <" + finfo.name + "> but have <" + start.Name() + ">") } fv := sv.FieldByIndex(finfo.idx) if _, ok := fv.Interface().(xml.Name); ok { fv.Set(reflect.ValueOf(xml.Name{Local: start.Name()})) } } var saveComment reflect.Value var doSaveComment = false _ = saveComment for i := range tinfo.fields { finfo := &tinfo.fields[i] switch finfo.flags & fMode { case fAttr: strv := sv.FieldByIndex(finfo.idx) for name, a := range start.Attributes() { if name == finfo.name { copyValue(strv, a.Content()) } } case fCharData: strv := sv.FieldByIndex(finfo.idx) copyValue(strv, start.Content()) case fInnerXml: strv := sv.FieldByIndex(finfo.idx) // TODO: Not sure why i need to call FirstChild() here. copyValue(strv, start.FirstChild().String()) case fComment: if !doSaveComment { doSaveComment = true saveComment = sv.FieldByIndex(finfo.idx) } } } for cur_node := start.FirstChild(); cur_node != nil; cur_node = cur_node.NextSibling() { if sv.IsValid() { if cur_node.NodeType() != gokoxml.XML_ELEMENT_NODE { if doSaveComment && cur_node.NodeType() == gokoxml.XML_COMMENT_NODE { copyValue(saveComment, cur_node.Content()) } continue } err = p.unmarshalPath(tinfo, sv, nil, cur_node) if err != nil { return err } } } } // switch v := val; v.Kind() { return nil }