// xmlSeqToMapParser - load a 'clean' XML doc into a map[string]interface{} directly. // Add #seq tag value for each element decoded - to be used for Encoding later. func xmlSeqToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[string]interface{}, error) { // NOTE: all attributes and sub-elements parsed into 'na', 'na' is returned as value for 'skey' in 'n'. var n, na map[string]interface{} var seq int // for including seq num when decoding // Allocate maps and load attributes, if any. // NOTE: on entry from NewMapXml(), etc., skey=="", and we fall through // to get StartElement then recurse with skey==xml.StartElement.Name.Local // where we begin allocating map[string]interface{} values 'n' and 'na'. if skey != "" { // 'n' only needs one slot - save call to runtime•hashGrow() // 'na' we don't know n = make(map[string]interface{}, 1) na = make(map[string]interface{}) if len(a) > 0 { // xml.Attr is decoded into: map["#attr"]map[<attr_label>]interface{} // where interface{} is map[string]interface{}{"#text":<attr_val>, "#seq":<attr_seq>} aa := make(map[string]interface{}, len(a)) for i, v := range a { if len(v.Name.Space) > 0 { aa[v.Name.Space+`:`+v.Name.Local] = map[string]interface{}{"#text": cast(v.Value, r), "#seq": i} } else { aa[v.Name.Local] = map[string]interface{}{"#text": cast(v.Value, r), "#seq": i} } } na["#attr"] = aa } } for { t, err := p.RawToken() if err != nil { if err != io.EOF { return nil, errors.New("xml.Decoder.Token() - " + err.Error()) } return nil, err } switch t.(type) { case xml.StartElement: tt := t.(xml.StartElement) // First call to xmlSeqToMapParser() doesn't pass xml.StartElement - the map key. // So when the loop is first entered, the first token is the root tag along // with any attributes, which we process here. // // Subsequent calls to xmlSeqToMapParser() will pass in tag+attributes for // processing before getting the next token which is the element value, // which is done above. if skey == "" { if len(tt.Name.Space) > 0 { return xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r) } else { return xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r) } } // If not initializing the map, parse the element. // len(nn) == 1, necessarily - it is just an 'n'. var nn map[string]interface{} if len(tt.Name.Space) > 0 { nn, err = xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r) } else { nn, err = xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r) } if err != nil { return nil, err } // The nn map[string]interface{} value is a na[nn_key] value. // We need to see if nn_key already exists - means we're parsing a list. // This may require converting na[nn_key] value into []interface{} type. // First, extract the key:val for the map - it's a singleton. var key string var val interface{} for key, val = range nn { break } // add "#seq" k:v pair - // Sequence number included even in list elements - this should allow us // to properly resequence even something goofy like: // <list>item 1</list> // <subelement>item 2</subelement> // <list>item 3</list> // where all the "list" subelements are decoded into an array. switch val.(type) { case map[string]interface{}: val.(map[string]interface{})["#seq"] = seq seq++ case interface{}: // a non-nil simple element: string, float64, bool v := map[string]interface{}{"#text": val, "#seq": seq} seq++ val = v } // 'na' holding sub-elements of n. // See if 'key' already exists. // If 'key' exists, then this is a list, if not just add key:val to na. if v, ok := na[key]; ok { var a []interface{} switch v.(type) { case []interface{}: a = v.([]interface{}) default: // anything else - note: v.(type) != nil a = []interface{}{v} } a = append(a, val) na[key] = a } else { na[key] = val // save it as a singleton } case xml.EndElement: if skey != "" { tt := t.(xml.EndElement) var name string if len(tt.Name.Space) > 0 { name = tt.Name.Space + `:` + tt.Name.Local } else { name = tt.Name.Local } if skey != name { return nil, fmt.Errorf("element %s not properly terminated, got %s at #%d", skey, name, p.InputOffset()) } } // len(n) > 0 if this is a simple element w/o xml.Attrs - see xml.CharData case. if len(n) == 0 { // If len(na)==0 we have an empty element == ""; // it has no xml.Attr nor xml.CharData. // Empty element content will be map["etag"]map["#text"]"" // after #seq injection - map["etag"]map["#seq"]seq - after return. if len(na) > 0 { n[skey] = na } else { n[skey] = "" // empty element } } return n, nil case xml.CharData: // clean up possible noise tt := strings.Trim(string(t.(xml.CharData)), "\t\r\b\n ") if skey == "" { // per Adrian (http://www.adrianlungu.com/) catch stray text // in decoder stream - // https://github.com/clbanning/mxj/pull/14#issuecomment-182816374 // NOTE: CharSetReader must be set to non-UTF-8 CharSet or you'll get // a p.Token() decoding error when the BOM is UTF-16 or UTF-32. continue } if len(tt) > 0 { // every simple element is a #text and has #seq associated with it na["#text"] = cast(tt, r) na["#seq"] = seq seq++ } case xml.Comment: if n == nil { // no root 'key' n = map[string]interface{}{"#comment": string(t.(xml.Comment))} return n, NoRoot } cm := make(map[string]interface{}, 2) cm["#text"] = string(t.(xml.Comment)) cm["#seq"] = seq seq++ na["#comment"] = cm case xml.Directive: if n == nil { // no root 'key' n = map[string]interface{}{"#directive": string(t.(xml.Directive))} return n, NoRoot } dm := make(map[string]interface{}, 2) dm["#text"] = string(t.(xml.Directive)) dm["#seq"] = seq seq++ na["#directive"] = dm case xml.ProcInst: if n == nil { na = map[string]interface{}{"#target": t.(xml.ProcInst).Target, "#inst": string(t.(xml.ProcInst).Inst)} n = map[string]interface{}{"#procinst": na} return n, NoRoot } pm := make(map[string]interface{}, 3) pm["#target"] = t.(xml.ProcInst).Target pm["#inst"] = string(t.(xml.ProcInst).Inst) pm["#seq"] = seq seq++ na["#procinst"] = pm default: // noop - shouldn't ever get here, now, since we handle all token types } } }