//This function creates variations on tokens without regards as to positions in the file. func getTokenVariations(t xml.Token) []xml.Token { var result []xml.Token = make([]xml.Token, 0) switch t := t.(type) { case xml.CharData: { //If the token is a number try some random number if _, err := strconv.Atoi(string(t)); err == nil { result = append(result, xml.CharData(randInt(rand.Intn(15)))) } result = append(result, xml.CharData(randString(rand.Intn(100)))) return result } case xml.StartElement: { for k := range t.Attr { if _, err := strconv.Atoi(string(t.Attr[k].Value)); err == nil { start := xml.CopyToken(t).(xml.StartElement) start.Attr[k].Value = string(randInt(rand.Intn(15))) result = append(result, start) } start := xml.CopyToken(t).(xml.StartElement) start.Attr[k].Value = string(randString(rand.Intn(100))) result = append(result, start) } return result } default: { return make([]xml.Token, 0) // No variations on non char tokens yet } } }
//ParseXML creates an XMLTree structure from an io.Reader func ParseXML(r io.Reader) (pathres.PathRes, error) { dec := xml.NewDecoder(r) tree := &element.PathResElement{Value: xml.StartElement{}, Children: []pathres.PathRes{}, Parent: nil} tree.Parent = tree pos := tree done := false for !done { t, err := dec.Token() if err != nil { return nil, err } if t == nil { break } switch t.(type) { case xml.StartElement: ele := t.(xml.StartElement) attrs := make([]pathres.PathRes, len(ele.Attr)) for i := range attrs { attrs[i] = &attribute.PathResAttribute{Value: &ele.Attr[i], Parent: pos} } ch := &element.PathResElement{Value: xml.CopyToken(ele), Attrs: attrs, Children: []pathres.PathRes{}, Parent: pos} pos.Children = append(pos.Children, ch) pos = ch case xml.CharData: ch := &chardata.PathResCharData{Value: xml.CopyToken(t), Parent: pos} pos.Children = append(pos.Children, ch) case xml.Comment: ch := &comment.PathResComment{Value: xml.CopyToken(t), Parent: pos} pos.Children = append(pos.Children, ch) case xml.ProcInst: if pos.Parent != pos { ch := &procinst.PathResProcInst{Value: xml.CopyToken(t), Parent: pos} pos.Children = append(pos.Children, ch) } case xml.EndElement: if pos.Parent == pos { return nil, fmt.Errorf("Malformed XML found.") } pos = pos.Parent.(*element.PathResElement) if pos.Parent == pos { done = true } } } return tree, nil }
func (d *Decode) parseXML(s string, pre *node) bool { tokenArray := make([]xmlToken, 0, 64) decoder := xml.NewDecoder(strings.NewReader(s)) for t, err := decoder.RawToken(); err == nil; t, err = decoder.RawToken() { tokenArray = append(tokenArray, xmlToken{token: xml.CopyToken(t)}) } var isXML bool for i := 0; i < len(tokenArray); i++ { if token, ok := tokenArray[i].token.(xml.CharData); ok { if i > 0 && i+1 < len(tokenArray) { if start, ok := tokenArray[i-1].token.(xml.StartElement); ok { if _, ok := tokenArray[i+1].token.(xml.EndElement); ok { isXML = true tokenArray[i].isData = true d.decode(string(token), &node{XMLOBJ, pre, start.Name.Local, &xmlObj{tokenArray, i}}) } } } } } return isXML }
func (d *Decoder) decodeAny(start xml.StartElement) (xml.Token, error) { t, err := d.xd.Token() if err != nil { return nil, fmt.Errorf("plist: error reading token: %s", err) } end, ok := t.(xml.EndElement) if ok { if end.Name.Local != start.Name.Local { return nil, fmt.Errorf("plist: unexpected end tag: %s", end.Name.Local) } // empty return nil, nil } tok := xml.CopyToken(t) next, err := d.nextElement() if err != nil { return nil, fmt.Errorf("plist: error reading token: %s", err) } end, ok = next.(xml.EndElement) if !ok || end.Name.Local != start.Name.Local { // empty return nil, fmt.Errorf("plist: unexpected end tag: %s", end.Name.Local) } return tok, nil }
func (d *Decoder) decodeAny(start xml.StartElement) (tok xml.Token, err error) { if tok, err = d.xd.Token(); err != nil { return } if end, ok := tok.(xml.EndElement); ok { if end.Name.Local != start.Name.Local { err = fmt.Errorf("plist: unexpected end tag: %s", end.Name.Local) return } // empty return } tok = xml.CopyToken(tok) var next xml.Token if next, err = d.nextElement(); err != nil { return } if end, ok := next.(xml.EndElement); !ok || end.Name.Local != start.Name.Local { // empty err = fmt.Errorf("plist: unexpected end tag: %s", end.Name.Local) return } return }
// normalize writes the normalized XML content of r to w. It applies the // following rules // // * Rename namespace prefixes according to an internal heuristic. // * Remove unnecessary namespace declarations. // * Sort attributes in XML start elements in lexical order of their // fully qualified name. // * Remove XML directives and processing instructions. // * Remove CDATA between XML tags that only contains whitespace, if // instructed to do so. // * Remove comments, if instructed to do so. // func (n *xmlNormalizer) normalize(w io.Writer, r io.Reader) error { d := xml.NewDecoder(r) e := xml.NewEncoder(w) for { t, err := d.Token() if err != nil { if t == nil && err == io.EOF { break } return err } switch val := t.(type) { case xml.Directive, xml.ProcInst: continue case xml.Comment: if n.omitComments { continue } case xml.CharData: if n.omitWhitespace && len(bytes.TrimSpace(val)) == 0 { continue } case xml.StartElement: start, _ := xml.CopyToken(val).(xml.StartElement) attr := start.Attr[:0] for _, a := range start.Attr { if a.Name.Space == "xmlns" || a.Name.Local == "xmlns" { continue } attr = append(attr, a) } sort.Sort(byName(attr)) start.Attr = attr t = start } err = e.EncodeToken(t) if err != nil { return err } } return e.Flush() }
func (d *Decoder) consumeExtensions(se xml.StartElement) (tokens []xml.Token, err error) { lvl := 0 for { tok, err := d.ts.Token() if err != nil { return tokens, err } switch tok.(type) { case xml.StartElement: lvl++ case xml.EndElement: if lvl == 0 { return tokens, nil } lvl-- } tokens = append(tokens, xml.CopyToken(tok)) } }
func xmlTokens(c *gc.C, data []byte) []xml.Token { dec := xml.NewDecoder(bytes.NewReader(data)) var toks []xml.Token for { tok, err := dec.Token() if err == io.EOF { return toks } c.Assert(err, gc.IsNil) if cdata, ok := tok.(xml.CharData); ok { // It's char data - trim all white space and ignore it // if it's all blank. cdata = bytes.TrimSpace(cdata) if len(cdata) == 0 { continue } tok = cdata } toks = append(toks, xml.CopyToken(tok)) } }
func (p *XMLPullParser) NextToken() (event XMLEventType, err error) { // Clear any state held for the previous token p.resetTokenState() token, err := p.decoder.Token() if err != nil { if err == io.EOF { // XML decoder returns the EOF as an error // but we want to return it as a valid // EndDocument token instead p.token = nil p.Event = EndDocument return p.Event, nil } return event, err } p.token = xml.CopyToken(token) p.processToken(p.token) p.Event = p.EventType(p.token) return p.Event, nil }
func variationsXML(f *zip.File) chan tokenchange { result := make(chan tokenchange) r, _ := f.Open() xmlReader := xml.NewDecoder(r) var tokenList []xml.Token for { if t, err := xmlReader.Token(); err == nil { tokenList = append(tokenList, xml.CopyToken(t)) } else { break } } go func() { //Over every token we want to break for TokenToBreak, _ := range tokenList { //Get the ways we can break that token for _, brokenToken := range getTokenVariations(tokenList[TokenToBreak]) { var buf bytes.Buffer xmlWriter := xml.NewEncoder(&buf) //Now create an xml file where one token is broken for currentToken, t := range tokenList { if currentToken == TokenToBreak { xmlWriter.EncodeToken(brokenToken) } else { xmlWriter.EncodeToken(t) } } xmlWriter.Flush() result <- tokenchange{buf, tokenList[TokenToBreak], brokenToken} } } close(result) }() return result }
// WIP approximation of first steps to be taken to encode manifest func TestEncode(t *testing.T) { f, err := os.Open("testdata/bootstrap.xml") if err != nil { t.Fatal(err) } var attrs []xml.Attr dec := xml.NewDecoder(f) for { tkn, err := dec.Token() if err != nil { if err == io.EOF { break } return // t.Fatal(err) } tkn = xml.CopyToken(tkn) switch tkn := tkn.(type) { case xml.StartElement: attrs = append(attrs, tkn.Attr...) default: // t.Error("unhandled token type", tkn) } } bvc := xml.Attr{ Name: xml.Name{ Space: "", Local: "platformBuildVersionCode", }, Value: "15", } bvn := xml.Attr{ Name: xml.Name{ Space: "", Local: "platformBuildVersionName", }, Value: "4.0.3", } attrs = append(attrs, bvc, bvn) sort.Sort(byNamespace(attrs)) var names, vals []string for _, attr := range attrs { if strings.HasSuffix(attr.Name.Space, "tools") { continue } names = append(names, attr.Name.Local) vals = append(vals, attr.Value) } var all []string all = append(all, names...) all = append(all, vals...) // do not eliminate duplicates until the entire slice has been composed. // consider <activity android:label="label" .../> // all attribute names come first followed by values; in such a case, the value "label" // would be a reference to the same "android:label" in the string pool which will occur // within the beginning of the pool where other attr names are located. pl := new(Pool) for _, x := range retset(all) { pl.strings = append(pl.strings, x) // t.Logf("Pool(%v) %q\n", i, x) } }
func UnmarshalXML(r io.Reader) (*XML, error) { tbl, err := OpenTable() if err != nil { return nil, err } lr := &lineReader{r: r} dec := xml.NewDecoder(lr) bx := new(XML) // temporary pool to resolve real poolref later pool := new(Pool) for { line := lr.line(dec.InputOffset()) tkn, err := dec.Token() if err != nil { if err == io.EOF { break } return nil, err } tkn = xml.CopyToken(tkn) switch tkn := tkn.(type) { case xml.StartElement: el := &Element{ NodeHeader: NodeHeader{ LineNumber: uint32(line), Comment: 0xFFFFFFFF, }, NS: NoEntry, Name: pool.ref(tkn.Name.Local), } if len(bx.stack) == 0 { bx.Children = append(bx.Children, el) } else { n := len(bx.stack) var p *Element p, bx.stack = bx.stack[n-1], bx.stack[:n-1] p.Children = append(p.Children, el) bx.stack = append(bx.stack, p) } bx.stack = append(bx.stack, el) if tkn.Name.Local == "manifest" { tkn.Attr = append(tkn.Attr, xml.Attr{ Name: xml.Name{ Space: "", Local: "platformBuildVersionCode", }, Value: "15", }, xml.Attr{ Name: xml.Name{ Space: "", Local: "platformBuildVersionName", }, Value: "4.0.4-1406430", }) } for _, attr := range tkn.Attr { if (attr.Name.Space == "xmlns" && attr.Name.Local == "tools") || attr.Name.Space == toolsSchema { continue // TODO can tbl be queried for schemas to determine validity instead? } if attr.Name.Space == "xmlns" && attr.Name.Local == "android" { if bx.Namespace != nil { return nil, fmt.Errorf("multiple declarations of xmlns:android encountered") } bx.Namespace = &Namespace{ NodeHeader: NodeHeader{ LineNumber: uint32(line), Comment: NoEntry, }, prefix: 0, uri: 0, } continue } nattr := &Attribute{ NS: pool.ref(attr.Name.Space), Name: pool.ref(attr.Name.Local), RawValue: NoEntry, } el.attrs = append(el.attrs, nattr) if attr.Name.Space == "" { nattr.NS = NoEntry // TODO it's unclear how to query these switch attr.Name.Local { case "platformBuildVersionCode": nattr.TypedValue.Type = DataIntDec i, err := strconv.Atoi(attr.Value) if err != nil { return nil, err } nattr.TypedValue.Value = uint32(i) default: // "package", "platformBuildVersionName", and any invalid nattr.RawValue = pool.ref(attr.Value) nattr.TypedValue.Type = DataString } } else { // get type spec and value data type ref, err := tbl.RefByName("attr/" + attr.Name.Local) if err != nil { return nil, err } nt, err := ref.Resolve(tbl) if err != nil { return nil, err } if len(nt.values) == 0 { panic("encountered empty values slice") } if len(nt.values) == 1 { val := nt.values[0] if val.data.Type != DataIntDec { panic("TODO only know how to handle DataIntDec type here") } t := DataType(val.data.Value) switch t { case DataString, DataAttribute, DataType(0x3e): // TODO identify 0x3e, in bootstrap.xml this is the native lib name nattr.RawValue = pool.ref(attr.Value) nattr.TypedValue.Type = DataString nattr.TypedValue.Value = uint32(nattr.RawValue) case DataIntBool, DataType(0x08): nattr.TypedValue.Type = DataIntBool switch attr.Value { case "true": nattr.TypedValue.Value = 0xFFFFFFFF case "false": nattr.TypedValue.Value = 0 default: return nil, fmt.Errorf("invalid bool value %q", attr.Value) } case DataIntDec, DataFloat, DataFraction: // TODO DataFraction needs it's own case statement. minSdkVersion identifies as DataFraction // but has accepted input in the past such as android:minSdkVersion="L" // Other use-cases for DataFraction are currently unknown as applicable to manifest generation // but this provides minimum support for writing out minSdkVersion="15" correctly. nattr.TypedValue.Type = DataIntDec i, err := strconv.Atoi(attr.Value) if err != nil { return nil, err } nattr.TypedValue.Value = uint32(i) case DataReference: nattr.TypedValue.Type = DataReference dref, err := tbl.RefByName(attr.Value) if err != nil { return nil, err } nattr.TypedValue.Value = uint32(dref) default: return nil, fmt.Errorf("unhandled data type %0#2x: %s", uint8(t), t) } } else { // 0x01000000 is an unknown ref that doesn't point to anything, typically // located at the start of entry value lists, peek at last value to determine type. t := nt.values[len(nt.values)-1].data.Type switch t { case DataIntDec: for _, val := range nt.values { if val.name == 0x01000000 { continue } nr, err := val.name.Resolve(tbl) if err != nil { return nil, err } if attr.Value == nr.key.Resolve(tbl.pkgs[0].keyPool) { // TODO hard-coded pkg ref nattr.TypedValue = *val.data break } } case DataIntHex: nattr.TypedValue.Type = t for _, x := range strings.Split(attr.Value, "|") { for _, val := range nt.values { if val.name == 0x01000000 { continue } nr, err := val.name.Resolve(tbl) if err != nil { return nil, err } if x == nr.key.Resolve(tbl.pkgs[0].keyPool) { // TODO hard-coded pkg ref nattr.TypedValue.Value |= val.data.Value break } } } default: return nil, fmt.Errorf("unhandled data type for configuration %0#2x: %s", uint8(t), t) } } } } case xml.CharData: if s := poolTrim(string(tkn)); s != "" { cdt := &CharData{ NodeHeader: NodeHeader{ LineNumber: uint32(line), Comment: NoEntry, }, RawData: pool.ref(s), } el := bx.stack[len(bx.stack)-1] if el.head == nil { el.head = cdt } else if el.tail == nil { el.tail = cdt } else { return nil, fmt.Errorf("element head and tail already contain chardata") } } case xml.EndElement: if tkn.Name.Local == "manifest" { bx.Namespace.end = &Namespace{ NodeHeader: NodeHeader{ LineNumber: uint32(line), Comment: NoEntry, }, prefix: 0, uri: 0, } } n := len(bx.stack) var el *Element el, bx.stack = bx.stack[n-1], bx.stack[:n-1] if el.end != nil { return nil, fmt.Errorf("element end already exists") } el.end = &ElementEnd{ NodeHeader: NodeHeader{ LineNumber: uint32(line), Comment: NoEntry, }, NS: el.NS, Name: el.Name, } case xml.Comment, xml.ProcInst: // discard default: panic(fmt.Errorf("unhandled token type: %T %+v", tkn, tkn)) } } // pools appear to be sorted as follows: // * attribute names prefixed with android: // * "android", [schema-url], [empty-string] // * for each node: // * attribute names with no prefix // * node name // * attribute value if data type of name is DataString, DataAttribute, or 0x3e (an unknown) bx.Pool = new(Pool) var arecurse func(*Element) arecurse = func(el *Element) { for _, attr := range el.attrs { if attr.NS == NoEntry { continue } if attr.NS.Resolve(pool) == androidSchema { bx.Pool.strings = append(bx.Pool.strings, attr.Name.Resolve(pool)) } } for _, child := range el.Children { arecurse(child) } } for _, el := range bx.Children { arecurse(el) } // TODO encoding/xml does not enforce namespace prefix and manifest encoding in aapt // appears to ignore all other prefixes. Inserting this manually is not strictly correct // for the general case, but the effort to do otherwise currently offers nothing. bx.Pool.strings = append(bx.Pool.strings, "android", androidSchema) // there always appears to be an empty string located after schema, even if one is // not present in manifest. bx.Pool.strings = append(bx.Pool.strings, "") var brecurse func(*Element) brecurse = func(el *Element) { for _, attr := range el.attrs { if attr.NS == NoEntry { bx.Pool.strings = append(bx.Pool.strings, attr.Name.Resolve(pool)) } } bx.Pool.strings = append(bx.Pool.strings, el.Name.Resolve(pool)) for _, attr := range el.attrs { if attr.RawValue != NoEntry { bx.Pool.strings = append(bx.Pool.strings, attr.RawValue.Resolve(pool)) } else if attr.NS == NoEntry { bx.Pool.strings = append(bx.Pool.strings, fmt.Sprintf("%+v", attr.TypedValue.Value)) } } if el.head != nil { bx.Pool.strings = append(bx.Pool.strings, el.head.RawData.Resolve(pool)) } if el.tail != nil { bx.Pool.strings = append(bx.Pool.strings, el.tail.RawData.Resolve(pool)) } for _, child := range el.Children { brecurse(child) } } for _, el := range bx.Children { brecurse(el) } // do not eliminate duplicates until the entire slice has been composed. // consider <activity android:label="label" .../> // all attribute names come first followed by values; in such a case, the value "label" // would be a reference to the same "android:label" in the string pool which will occur // within the beginning of the pool where other attr names are located. bx.Pool.strings = asSet(bx.Pool.strings) // TODO consider cases of multiple declarations of the same attr name that should return error // before ever reaching this point. bx.Map = new(Map) for _, s := range bx.Pool.strings { ref, err := tbl.RefByName("attr/" + s) if err != nil { break // break after first non-ref as all strings after are also non-refs. } bx.Map.rs = append(bx.Map.rs, ref) } // resolve tmp pool refs to final pool refs // TODO drop this in favor of sort directly on Table var resolve func(el *Element) resolve = func(el *Element) { if el.NS != NoEntry { el.NS = bx.Pool.ref(el.NS.Resolve(pool)) el.end.NS = el.NS } el.Name = bx.Pool.ref(el.Name.Resolve(pool)) el.end.Name = el.Name for _, attr := range el.attrs { if attr.NS != NoEntry { attr.NS = bx.Pool.ref(attr.NS.Resolve(pool)) } attr.Name = bx.Pool.ref(attr.Name.Resolve(pool)) if attr.RawValue != NoEntry { attr.RawValue = bx.Pool.ref(attr.RawValue.Resolve(pool)) if attr.TypedValue.Type == DataString { attr.TypedValue.Value = uint32(attr.RawValue) } } } for _, child := range el.Children { resolve(child) } } for _, el := range bx.Children { resolve(el) } var asort func(*Element) asort = func(el *Element) { sort.Sort(byType(el.attrs)) sort.Sort(byNamespace(el.attrs)) sort.Sort(byName(el.attrs)) for _, child := range el.Children { asort(child) } } for _, el := range bx.Children { asort(el) } for i, s := range bx.Pool.strings { switch s { case androidSchema: bx.Namespace.uri = PoolRef(i) bx.Namespace.end.uri = PoolRef(i) case "android": bx.Namespace.prefix = PoolRef(i) bx.Namespace.end.prefix = PoolRef(i) } } return bx, nil }
func UnmarshalXML(r io.Reader) (*XML, error) { tbl, err := OpenTable() if err != nil { return nil, err } var nodes []xnode dec := xml.NewDecoder(r) bx := new(XML) for { tkn, err := dec.Token() if err != nil { if err == io.EOF { break } return nil, err } tkn = xml.CopyToken(tkn) switch tkn := tkn.(type) { case xml.StartElement: nodes = append(nodes, xnode{name: tkn.Name}) for _, attr := range tkn.Attr { if attr.Name.Space == toolsSchema || (attr.Name.Space == "xmlns" && attr.Name.Local == "tools") { continue // TODO can tbl be queried for schemas to determine validity instead? } att := xattr{attr, false} if attr.Name.Space == "" || attr.Name.Space == "xmlns" { att.bool = true } else if attr.Name.Space == androidSchema { // get type spec and value data type ref, err := tbl.RefByName("attr/" + attr.Name.Local) if err != nil { return nil, err } nt, err := ref.Resolve(tbl) if err != nil { return nil, err } if len(nt.values) == 0 { // TODO don't know if this can happen panic("TODO don't know how to handle empty values slice") } if len(nt.values) == 1 { val := nt.values[0] if val.data.Type != DataIntDec { panic("TODO only know how to handle DataIntDec type here") } t := DataType(val.data.Value) switch t { case DataString, DataAttribute, DataType(0x3e): // TODO identify 0x3e, in bootstrap.xml this is the native lib name // TODO why DataAttribute? confirm details of usage att.bool = true default: // TODO resolve other data types // fmt.Printf("unhandled data type %0#4x: %s\n", uint32(t), t) } } else { // attribute value must resolve to one of the values here // TODO resolve reference values and assure they match list of values here } } nodes[len(nodes)-1].attrs = append(nodes[len(nodes)-1].attrs, att) } case xml.CharData: if s := poolTrim(string(tkn)); s != "" { nodes[len(nodes)-1].cdata = append(nodes[len(nodes)-1].cdata, s) } case xml.EndElement, xml.Comment, xml.ProcInst: // discard default: panic(fmt.Errorf("unhandled token type: %T %+v", tkn, tkn)) } } bvc := xml.Attr{ Name: xml.Name{ Space: "", Local: "platformBuildVersionCode", }, Value: "15", } bvn := xml.Attr{ Name: xml.Name{ Space: "", Local: "platformBuildVersionName", }, Value: "4.0.3", } nodes[0].attrs = append(nodes[0].attrs, xattr{bvc, true}, xattr{bvn, true}) // pools appear to be sorted as follows: // * attribute names prefixed with android: // * "android", [schema-url], [empty-string] // * for each node: // * attribute names with no prefix // * node name // * attribute value if data type of name is DataString, DataAttribute, or 0x3e (an unknown) bx.Pool = new(Pool) for _, node := range nodes { for _, attr := range node.attrs { if attr.Name.Space == androidSchema { bx.Pool.strings = append(bx.Pool.strings, attr.Name.Local) } } } // TODO encoding/xml does not enforce namespace prefix and manifest encoding in aapt // appears to ignore all other prefixes. Inserting this manually is not strictly correct // for the general case, but the effort to do otherwise currently offers nothing. bx.Pool.strings = append(bx.Pool.strings, "android", androidSchema) // there always appears to be an empty string located after schema, even if one is // not present in manifest. bx.Pool.strings = append(bx.Pool.strings, "") for _, node := range nodes { for _, attr := range node.attrs { if attr.Name.Space == "" { bx.Pool.strings = append(bx.Pool.strings, attr.Name.Local) } } bx.Pool.strings = append(bx.Pool.strings, node.name.Local) for _, attr := range node.attrs { if attr.bool { bx.Pool.strings = append(bx.Pool.strings, attr.Value) } } for _, x := range node.cdata { bx.Pool.strings = append(bx.Pool.strings, x) } } // do not eliminate duplicates until the entire slice has been composed. // consider <activity android:label="label" .../> // all attribute names come first followed by values; in such a case, the value "label" // would be a reference to the same "android:label" in the string pool which will occur // within the beginning of the pool where other attr names are located. bx.Pool.strings = asSet(bx.Pool.strings) return bx, nil }
func setNode(opts *xmlbuilder.BuilderOpts, xmlTree xmlbuilder.XMLBuilder, tok xml.Token, nt tree.NodeType, ordrPos *int) { opts.Tok = xml.CopyToken(tok) opts.NodeType = nt opts.NodePos = *ordrPos *ordrPos++ }