Example #1
0
// findElement skips everything until we find an element of the given name.
func findElement(d *xml.Decoder, names []xml.Name) (xml.StartElement, error) {
	for {
		t, err := d.RawToken()
		if err != nil {
			return xml.StartElement{}, err
		}
		if start, ok := t.(xml.StartElement); ok {
			for _, v := range names {
				if v == start.Name {
					return start, nil
				}
			}
		}
	}
	panic("unreachable")
}
Example #2
0
// parseList returns all child nodes of the given name, plus CharData.
func parseList(d *xml.Decoder, names, stack []xml.Name) ([]Node, error) {
	var c []Node
	for len(stack) > 0 {
		t, err := d.RawToken()
		if err != nil {
			return nil, fmt.Errorf("unclosed tags: %v", stack)
		}
		// A token can be of the following types:
		//
		//   xml.CharData
		//   xml.Comment
		//   xml.Directive
		//   xml.EndElement
		//   xml.ProcInst
		//   xml.StartElement
		switch t := t.(type) {
		case xml.StartElement:
			found := false
			for _, v := range names {
				if v == t.Name {
					found = true
					list, err := parseList(d, names, []xml.Name{t.Name})
					if err != nil {
						return nil, err
					}
					c = append(c, Node{Token: t, List: list})
				}
			}
			if !found {
				stack = append(stack, t.Name)
			}
		case xml.EndElement:
			if stack, err = popName(stack, t.Name); err != nil {
				return nil, err
			}
		case xml.CharData:
			if b := bytes.TrimSpace(t); len(b) > 0 {
				// Need to make a copy of b.
				b1 := make(xml.CharData, len(b))
				copy(b1, b)
				c = append(c, Node{Token: b1})
			}
		}
	}
	return c, nil
}
Example #3
0
// xmlToTreeParser - load a 'clean' XML doc into a tree of *node.
func xmlToTreeParser(skey string, a []xml.Attr, p *xml.Decoder) (*node, error) {
	n := new(node)
	n.nodes = make([]*node, 0)
	var seq int // for includeTagSeqNum

	if skey != "" {
		n.key = skey
		if len(a) > 0 {
			for _, v := range a {
				na := new(node)
				na.attr = true
				na.key = v.Name.Local
				na.val = v.Value
				n.nodes = append(n.nodes, na)
			}
		}
	}
	for {
		t, err := p.RawToken()
		if err != nil {
			if err != io.EOF {
				return nil, errors.New("xml.Decoder.Token() - " + err.Error())
			}
			return nil, err
		}
		switch t.(type) {
		case xml.StartElement:
			tt := t.(xml.StartElement)
			var key string

			if tt.Name.Space != "" {
				key = tt.Name.Space + ":" + tt.Name.Local
			} else {
				key = tt.Name.Local
			}

			// handle root
			if n.key == "" {
				n.key = key
				if len(tt.Attr) > 0 {
					for _, v := range tt.Attr {
						na := new(node)
						na.attr = true
						na.key = v.Name.Local
						na.val = v.Value
						n.nodes = append(n.nodes, na)
					}
				}
			} else {
				nn, nnerr := xmlToTreeParser(key, tt.Attr, p)
				if nnerr != nil {
					return nil, nnerr
				}
				n.nodes = append(n.nodes, nn)
				if includeTagSeqNum { // 2014.11.09
					sn := &node{false, false, "_seq", strconv.Itoa(seq), nil}
					nn.nodes = append(nn.nodes, sn)
					seq++
				}
			}
		case xml.EndElement:
			// scan n.nodes for duplicate n.key values
			n.markDuplicateKeys()
			return n, nil
		case xml.CharData:
			tt := string(t.(xml.CharData))
			// clean up possible noise
			tt = strings.Trim(tt, "\t\r\b\n ")
			if len(n.nodes) > 0 && len(tt) > 0 {
				// if len(n.nodes) > 0 {
				nn := new(node)
				nn.key = "_"
				nn.val = tt
				n.nodes = append(n.nodes, nn)
			} else {
				n.val = tt
			}
			if includeTagSeqNum { // 2014.11.09
				if len(n.nodes) == 0 { // treat like a simple element with attributes
					nn := new(node)
					nn.key = "_"
					nn.val = tt
					n.nodes = append(n.nodes, nn)
				}
				sn := &node{false, false, "_seq", strconv.Itoa(seq), nil}
				n.nodes = append(n.nodes, sn)
				seq++
			}
		default:
			// noop
		}
	}
	// Logically we can't get here, but provide an error message anyway.
	return nil, fmt.Errorf("Unknown parse error in xmlToTree() for: %s", n.key)
}
Example #4
0
// xmlSeqToMapParser - load a 'clean' XML doc into a map[string]interface{} directly.
// Add #seq tag value for each element decoded - to be used for Encoding later.
func xmlSeqToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[string]interface{}, error) {
	// NOTE: all attributes and sub-elements parsed into 'na', 'na' is returned as value for 'skey' in 'n'.
	var n, na map[string]interface{}
	var seq int // for including seq num when decoding

	// Allocate maps and load attributes, if any.
	// NOTE: on entry from NewMapXml(), etc., skey=="", and we fall through
	//       to get StartElement then recurse with skey==xml.StartElement.Name.Local
	//       where we begin allocating map[string]interface{} values 'n' and 'na'.
	if skey != "" {
		// 'n' only needs one slot - save call to runtime•hashGrow()
		// 'na' we don't know
		n = make(map[string]interface{}, 1)
		na = make(map[string]interface{})
		if len(a) > 0 {
			// xml.Attr is decoded into: map["#attr"]map[<attr_label>]interface{}
			// where interface{} is map[string]interface{}{"#text":<attr_val>, "#seq":<attr_seq>}
			aa := make(map[string]interface{}, len(a))
			for i, v := range a {
				if len(v.Name.Space) > 0 {
					aa[v.Name.Space+`:`+v.Name.Local] = map[string]interface{}{"#text": cast(v.Value, r), "#seq": i}
				} else {
					aa[v.Name.Local] = map[string]interface{}{"#text": cast(v.Value, r), "#seq": i}
				}
			}
			na["#attr"] = aa
		}
	}
	for {
		t, err := p.RawToken()
		if err != nil {
			if err != io.EOF {
				return nil, errors.New("xml.Decoder.Token() - " + err.Error())
			}
			return nil, err
		}
		switch t.(type) {
		case xml.StartElement:
			tt := t.(xml.StartElement)

			// First call to xmlSeqToMapParser() doesn't pass xml.StartElement - the map key.
			// So when the loop is first entered, the first token is the root tag along
			// with any attributes, which we process here.
			//
			// Subsequent calls to xmlSeqToMapParser() will pass in tag+attributes for
			// processing before getting the next token which is the element value,
			// which is done above.
			if skey == "" {
				if len(tt.Name.Space) > 0 {
					return xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r)
				} else {
					return xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r)
				}
			}

			// If not initializing the map, parse the element.
			// len(nn) == 1, necessarily - it is just an 'n'.
			var nn map[string]interface{}
			if len(tt.Name.Space) > 0 {
				nn, err = xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r)
			} else {
				nn, err = xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r)
			}
			if err != nil {
				return nil, err
			}

			// The nn map[string]interface{} value is a na[nn_key] value.
			// We need to see if nn_key already exists - means we're parsing a list.
			// This may require converting na[nn_key] value into []interface{} type.
			// First, extract the key:val for the map - it's a singleton.
			var key string
			var val interface{}
			for key, val = range nn {
				break
			}

			// add "#seq" k:v pair -
			// Sequence number included even in list elements - this should allow us
			// to properly resequence even something goofy like:
			//     <list>item 1</list>
			//     <subelement>item 2</subelement>
			//     <list>item 3</list>
			// where all the "list" subelements are decoded into an array.
			switch val.(type) {
			case map[string]interface{}:
				val.(map[string]interface{})["#seq"] = seq
				seq++
			case interface{}: // a non-nil simple element: string, float64, bool
				v := map[string]interface{}{"#text": val, "#seq": seq}
				seq++
				val = v
			}

			// 'na' holding sub-elements of n.
			// See if 'key' already exists.
			// If 'key' exists, then this is a list, if not just add key:val to na.
			if v, ok := na[key]; ok {
				var a []interface{}
				switch v.(type) {
				case []interface{}:
					a = v.([]interface{})
				default: // anything else - note: v.(type) != nil
					a = []interface{}{v}
				}
				a = append(a, val)
				na[key] = a
			} else {
				na[key] = val // save it as a singleton
			}
		case xml.EndElement:
			if skey != "" {
				tt := t.(xml.EndElement)
				var name string
				if len(tt.Name.Space) > 0 {
					name = tt.Name.Space + `:` + tt.Name.Local
				} else {
					name = tt.Name.Local
				}
				if skey != name {
					return nil, fmt.Errorf("element %s not properly terminated, got %s at #%d",
						skey, name, p.InputOffset())
				}
			}
			// len(n) > 0 if this is a simple element w/o xml.Attrs - see xml.CharData case.
			if len(n) == 0 {
				// If len(na)==0 we have an empty element == "";
				// it has no xml.Attr nor xml.CharData.
				// Empty element content will be  map["etag"]map["#text"]""
				// after #seq injection - map["etag"]map["#seq"]seq - after return.
				if len(na) > 0 {
					n[skey] = na
				} else {
					n[skey] = "" // empty element
				}
			}
			return n, nil
		case xml.CharData:
			// clean up possible noise
			tt := strings.Trim(string(t.(xml.CharData)), "\t\r\b\n ")
			if skey == "" {
				// per Adrian (http://www.adrianlungu.com/) catch stray text
				// in decoder stream -
				// https://github.com/clbanning/mxj/pull/14#issuecomment-182816374
				// NOTE: CharSetReader must be set to non-UTF-8 CharSet or you'll get
				// a p.Token() decoding error when the BOM is UTF-16 or UTF-32.
				continue
			}
			if len(tt) > 0 {
				// every simple element is a #text and has #seq associated with it
				na["#text"] = cast(tt, r)
				na["#seq"] = seq
				seq++
			}
		case xml.Comment:
			if n == nil { // no root 'key'
				n = map[string]interface{}{"#comment": string(t.(xml.Comment))}
				return n, NoRoot
			}
			cm := make(map[string]interface{}, 2)
			cm["#text"] = string(t.(xml.Comment))
			cm["#seq"] = seq
			seq++
			na["#comment"] = cm
		case xml.Directive:
			if n == nil { // no root 'key'
				n = map[string]interface{}{"#directive": string(t.(xml.Directive))}
				return n, NoRoot
			}
			dm := make(map[string]interface{}, 2)
			dm["#text"] = string(t.(xml.Directive))
			dm["#seq"] = seq
			seq++
			na["#directive"] = dm
		case xml.ProcInst:
			if n == nil {
				na = map[string]interface{}{"#target": t.(xml.ProcInst).Target, "#inst": string(t.(xml.ProcInst).Inst)}
				n = map[string]interface{}{"#procinst": na}
				return n, NoRoot
			}
			pm := make(map[string]interface{}, 3)
			pm["#target"] = t.(xml.ProcInst).Target
			pm["#inst"] = string(t.(xml.ProcInst).Inst)
			pm["#seq"] = seq
			seq++
			na["#procinst"] = pm
		default:
			// noop - shouldn't ever get here, now, since we handle all token types
		}
	}
}