Beispiel #1
0
// unmarshalPath walks down an XML structure looking for wanted
// paths, and calls unmarshal on them.
// The consumed result tells whether XML elements have been consumed
// from the Decoder until start's matching end element, or if it's
// still untouched because start is uninteresting for sv's fields.
func (p *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start gokoxml.Node) (err error) {
	recurse := false
	name := start.Name() // For speed

Loop:
	for i := range tinfo.fields {
		finfo := &tinfo.fields[i]
		if finfo.flags&fElement == 0 || len(finfo.parents) < len(parents) {
			continue
		}
		for j := range parents {
			if parents[j] != finfo.parents[j] {
				continue Loop
			}
		}
		if len(finfo.parents) == len(parents) && finfo.name == name {
			// It's a perfect match, unmarshal the field.
			return p.unmarshal(sv.FieldByIndex(finfo.idx), start)
		}
		if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == name {
			// It's a prefix for the field. Break and recurse
			// since it's not ok for one field path to be itself
			// the prefix for another field path.
			recurse = true

			// We can reuse the same slice as long as we
			// don't try to append to it.
			parents = finfo.parents[:len(parents)+1]
			break
		}
	}

	if !recurse {
		// We have no business with this element.
		return nil
	}

	// The element is not a perfect match for any field, but one
	// or more fields have the path to this element as a parent
	// prefix. Recurse and attempt to match these.
	for cur_node := start.FirstChild(); cur_node != nil; cur_node = cur_node.NextSibling() {
		if cur_node.NodeType() != gokoxml.XML_ELEMENT_NODE {
			continue
		}

		if err := p.unmarshalPath(tinfo, sv, parents, cur_node); err != nil {
			return err
		}
	}

	// No more XML Nodes.
	return nil
}
Beispiel #2
0
// Formats the content of inline elements and writes the content of block
// elements to the buffer.
func (self *Formatter) handleNode(node xml.Node) {
	name := node.Name()

	switch {
	case ignore[name]:
		// Remove ignored elements.
		node.SetContent("")
	case name == "pre":
		// Treat pre elements as code blocks.
		self.writeCodeBlock(node)
	case heading[name]:
		// Headings are prefixed with "# ".
		self.writeBlock(node, "# ")
	case name == "li":
		// List items are prefixed with "- ".
		self.writeBlock(node, "- ")
	case name == "br":
		// Preserve explicit line breaks.
		node.SetContent("\n")
	case italic[name]:
		// Wrap italic elements with /.
		node.SetContent("/" + node.Content() + "/")
	case bold[name]:
		// Wrap bold elements with *.
		node.SetContent("*" + node.Content() + "*")
	case name == "img":
		// Collect the src of images and replace them with (alt)[url index]
		alt, src := node.Attr("alt"), node.Attr("src")

		if len(alt) > 0 && len(src) > 0 {
			node.SetContent(fmt.Sprintf("(%s)[%d]", alt, len(self.links)))
			self.links = append(self.links, src)
		}
	case name == "a":
		// Collect the href and and the url index.
		href, content := node.Attr("href"), node.Content()

		if len(href) > 0 && len(content) > 0 {
			node.SetContent(fmt.Sprintf("%s[%d]", content, len(self.links)))
			self.links = append(self.links, href)
		}
	case block[name]:
		// Write the content of block elements to the buffer.
		self.writeBlock(node, "")
	}
}
Beispiel #3
0
func newCadidate(elem xml.Node) *Candidate {
	this := &Candidate{elem, 0}
	switch strings.ToLower(elem.Name()) {

	case "div":
		this.score = this.weight() + 5.0
		break
	case "blockquote":
		this.score = this.weight() + 3.0
		break
	case "form":
		this.score = this.weight() - 3.0
		break
	case "th":
		this.score = this.weight() - 5.0
		break
	default:
		this.score = this.weight()
		break
	}
	return this
}
Beispiel #4
0
func (p *Decoder) unmarshal(val reflect.Value, start gokoxml.Node) error {
	// Find first xml node.
	if start == nil {
		start = p.doc.Root().XmlNode
	}

	// Unpacks a pointer
	if pv := val; pv.Kind() == reflect.Ptr {
		if pv.IsNil() {
			pv.Set(reflect.New(pv.Type().Elem()))
		}
		val = pv.Elem()
	}

	var (
		sv    reflect.Value
		tinfo *typeInfo
		err   error
	)

	switch v := val; v.Kind() {
	default:
		return errors.New("unknown type " + v.Type().String())

		// TODO: Implement this once i understand Skip()
		// case reflect.Interface:
		// 	return p.Skip()

	case reflect.Slice:
		typ := v.Type()
		if typ.Elem().Kind() == reflect.Uint8 {
			// []byte
			if err := copyValue(v, start.Content()); err != nil {
				return err
			}
			break
		}

		// Slice of element values.
		// Grow slice.
		n := v.Len()
		if n >= v.Cap() {
			ncap := 2 * n
			if ncap < 4 {
				ncap = 4
			}
			new := reflect.MakeSlice(typ, n, ncap)
			reflect.Copy(new, v)
			v.Set(new)
		}
		v.SetLen(n + 1)

		// Recur to read element into slice.
		if err := p.unmarshal(v.Index(n), start); err != nil {
			v.SetLen(n)
			return err
		}
		return nil

	case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String:
		if err := copyValue(v, start.Content()); err != nil {
			return err
		}

	case reflect.Struct:
		typ := v.Type()
		if typ == nameType {
			v.Set(reflect.ValueOf(xml.Name{Local: start.Name()}))
			break
		}
		if typ == timeType {
			if err := copyValue(v, start.Content()); err != nil {
				return err
			}
			break
		}

		sv = v
		tinfo, err = getTypeInfo(typ)
		if err != nil {
			return err
		}

		// Validate and assign element name.
		if tinfo.xmlname != nil {
			// var space string
			finfo := tinfo.xmlname
			if finfo.name != "" && finfo.name != start.Name() {
				return UnmarshalError("expected element type <" + finfo.name + "> but have <" + start.Name() + ">")
			}

			fv := sv.FieldByIndex(finfo.idx)
			if _, ok := fv.Interface().(xml.Name); ok {
				fv.Set(reflect.ValueOf(xml.Name{Local: start.Name()}))
			}
		}

		var saveComment reflect.Value
		var doSaveComment = false
		_ = saveComment

		for i := range tinfo.fields {
			finfo := &tinfo.fields[i]
			switch finfo.flags & fMode {
			case fAttr:
				strv := sv.FieldByIndex(finfo.idx)
				for name, a := range start.Attributes() {
					if name == finfo.name {
						copyValue(strv, a.Content())
					}
				}
			case fCharData:
				strv := sv.FieldByIndex(finfo.idx)
				copyValue(strv, start.Content())

			case fInnerXml:
				strv := sv.FieldByIndex(finfo.idx)
				// TODO: Not sure why i need to call FirstChild() here.
				copyValue(strv, start.FirstChild().String())

			case fComment:
				if !doSaveComment {
					doSaveComment = true
					saveComment = sv.FieldByIndex(finfo.idx)
				}
			}
		}

		for cur_node := start.FirstChild(); cur_node != nil; cur_node = cur_node.NextSibling() {
			if sv.IsValid() {
				if cur_node.NodeType() != gokoxml.XML_ELEMENT_NODE {
					if doSaveComment && cur_node.NodeType() == gokoxml.XML_COMMENT_NODE {
						copyValue(saveComment, cur_node.Content())
					}
					continue
				}

				err = p.unmarshalPath(tinfo, sv, nil, cur_node)
				if err != nil {
					return err
				}
			}
		}

	} // switch v := val; v.Kind() {

	return nil
}