Example #1
0
// xmlSeqToMapParser - load a 'clean' XML doc into a map[string]interface{} directly.
// Add #seq tag value for each element decoded - to be used for Encoding later.
func xmlSeqToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[string]interface{}, error) {
	// NOTE: all attributes and sub-elements parsed into 'na', 'na' is returned as value for 'skey' in 'n'.
	var n, na map[string]interface{}
	var seq int // for including seq num when decoding

	// Allocate maps and load attributes, if any.
	// NOTE: on entry from NewMapXml(), etc., skey=="", and we fall through
	//       to get StartElement then recurse with skey==xml.StartElement.Name.Local
	//       where we begin allocating map[string]interface{} values 'n' and 'na'.
	if skey != "" {
		// 'n' only needs one slot - save call to runtime•hashGrow()
		// 'na' we don't know
		n = make(map[string]interface{}, 1)
		na = make(map[string]interface{})
		if len(a) > 0 {
			// xml.Attr is decoded into: map["#attr"]map[<attr_label>]interface{}
			// where interface{} is map[string]interface{}{"#text":<attr_val>, "#seq":<attr_seq>}
			aa := make(map[string]interface{}, len(a))
			for i, v := range a {
				if len(v.Name.Space) > 0 {
					aa[v.Name.Space+`:`+v.Name.Local] = map[string]interface{}{"#text": cast(v.Value, r), "#seq": i}
				} else {
					aa[v.Name.Local] = map[string]interface{}{"#text": cast(v.Value, r), "#seq": i}
				}
			}
			na["#attr"] = aa
		}
	}
	for {
		t, err := p.RawToken()
		if err != nil {
			if err != io.EOF {
				return nil, errors.New("xml.Decoder.Token() - " + err.Error())
			}
			return nil, err
		}
		switch t.(type) {
		case xml.StartElement:
			tt := t.(xml.StartElement)

			// First call to xmlSeqToMapParser() doesn't pass xml.StartElement - the map key.
			// So when the loop is first entered, the first token is the root tag along
			// with any attributes, which we process here.
			//
			// Subsequent calls to xmlSeqToMapParser() will pass in tag+attributes for
			// processing before getting the next token which is the element value,
			// which is done above.
			if skey == "" {
				if len(tt.Name.Space) > 0 {
					return xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r)
				} else {
					return xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r)
				}
			}

			// If not initializing the map, parse the element.
			// len(nn) == 1, necessarily - it is just an 'n'.
			var nn map[string]interface{}
			if len(tt.Name.Space) > 0 {
				nn, err = xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r)
			} else {
				nn, err = xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r)
			}
			if err != nil {
				return nil, err
			}

			// The nn map[string]interface{} value is a na[nn_key] value.
			// We need to see if nn_key already exists - means we're parsing a list.
			// This may require converting na[nn_key] value into []interface{} type.
			// First, extract the key:val for the map - it's a singleton.
			var key string
			var val interface{}
			for key, val = range nn {
				break
			}

			// add "#seq" k:v pair -
			// Sequence number included even in list elements - this should allow us
			// to properly resequence even something goofy like:
			//     <list>item 1</list>
			//     <subelement>item 2</subelement>
			//     <list>item 3</list>
			// where all the "list" subelements are decoded into an array.
			switch val.(type) {
			case map[string]interface{}:
				val.(map[string]interface{})["#seq"] = seq
				seq++
			case interface{}: // a non-nil simple element: string, float64, bool
				v := map[string]interface{}{"#text": val, "#seq": seq}
				seq++
				val = v
			}

			// 'na' holding sub-elements of n.
			// See if 'key' already exists.
			// If 'key' exists, then this is a list, if not just add key:val to na.
			if v, ok := na[key]; ok {
				var a []interface{}
				switch v.(type) {
				case []interface{}:
					a = v.([]interface{})
				default: // anything else - note: v.(type) != nil
					a = []interface{}{v}
				}
				a = append(a, val)
				na[key] = a
			} else {
				na[key] = val // save it as a singleton
			}
		case xml.EndElement:
			if skey != "" {
				tt := t.(xml.EndElement)
				var name string
				if len(tt.Name.Space) > 0 {
					name = tt.Name.Space + `:` + tt.Name.Local
				} else {
					name = tt.Name.Local
				}
				if skey != name {
					return nil, fmt.Errorf("element %s not properly terminated, got %s at #%d",
						skey, name, p.InputOffset())
				}
			}
			// len(n) > 0 if this is a simple element w/o xml.Attrs - see xml.CharData case.
			if len(n) == 0 {
				// If len(na)==0 we have an empty element == "";
				// it has no xml.Attr nor xml.CharData.
				// Empty element content will be  map["etag"]map["#text"]""
				// after #seq injection - map["etag"]map["#seq"]seq - after return.
				if len(na) > 0 {
					n[skey] = na
				} else {
					n[skey] = "" // empty element
				}
			}
			return n, nil
		case xml.CharData:
			// clean up possible noise
			tt := strings.Trim(string(t.(xml.CharData)), "\t\r\b\n ")
			if skey == "" {
				// per Adrian (http://www.adrianlungu.com/) catch stray text
				// in decoder stream -
				// https://github.com/clbanning/mxj/pull/14#issuecomment-182816374
				// NOTE: CharSetReader must be set to non-UTF-8 CharSet or you'll get
				// a p.Token() decoding error when the BOM is UTF-16 or UTF-32.
				continue
			}
			if len(tt) > 0 {
				// every simple element is a #text and has #seq associated with it
				na["#text"] = cast(tt, r)
				na["#seq"] = seq
				seq++
			}
		case xml.Comment:
			if n == nil { // no root 'key'
				n = map[string]interface{}{"#comment": string(t.(xml.Comment))}
				return n, NoRoot
			}
			cm := make(map[string]interface{}, 2)
			cm["#text"] = string(t.(xml.Comment))
			cm["#seq"] = seq
			seq++
			na["#comment"] = cm
		case xml.Directive:
			if n == nil { // no root 'key'
				n = map[string]interface{}{"#directive": string(t.(xml.Directive))}
				return n, NoRoot
			}
			dm := make(map[string]interface{}, 2)
			dm["#text"] = string(t.(xml.Directive))
			dm["#seq"] = seq
			seq++
			na["#directive"] = dm
		case xml.ProcInst:
			if n == nil {
				na = map[string]interface{}{"#target": t.(xml.ProcInst).Target, "#inst": string(t.(xml.ProcInst).Inst)}
				n = map[string]interface{}{"#procinst": na}
				return n, NoRoot
			}
			pm := make(map[string]interface{}, 3)
			pm["#target"] = t.(xml.ProcInst).Target
			pm["#inst"] = string(t.(xml.ProcInst).Inst)
			pm["#seq"] = seq
			seq++
			na["#procinst"] = pm
		default:
			// noop - shouldn't ever get here, now, since we handle all token types
		}
	}
}