func main() { xml := `<?xml version="1.0"?> <!DOCTYPE doc [ <!ELEMENT doc (src | dest)*> <!ELEMENT src EMPTY> <!ELEMENT dest EMPTY> <!ATTLIST src ref IDREF #IMPLIED> <!ATTLIST dest id ID #IMPLIED> ]> <doc> <src ref="foo"/> <dest id="foo"/> <src ref="foo"/> <employee> <firstname>John</firstname> <lastname>Smith</lastname> <food type="dessert">Ice cream</food> <description> Born on <date lang="norwegian">03.03.99</date> .... </description> </employee> </doc> ` doc, _ := gokogiri.ParseXml([]byte(xml)) defer doc.Free() rootElement := doc.Root() printElementNames(rootElement) }
func fetchAndFormatArticles(sem *chan int, sub *Subscription, articleChannel *chan *[]Article, waitGroup *sync.WaitGroup) { resp, err := http.Get(sub.Url) if err != nil { log.Printf("Unable to fetch articles from %v | Error: %v ", sub.Url, err.Error()) // TODO: Manage error. How? *sem <- 1 waitGroup.Done() return } body, _ := ioutil.ReadAll(resp.Body) resp.Body.Close() doc, err := gokogiri.ParseXml(body) if err != nil { log.Println("Error parsing XML on feed with URL " + sub.Url) return } defer doc.Free() doc.RecursivelyRemoveNamespaces() var articles []Article if doc.Root().Name() == "rss" { formatRSS(sub, doc, &articles) } else if doc.Root().Name() == "feed" { formatAtom(sub, doc, &articles) } *articleChannel <- &articles *sem <- 1 waitGroup.Done() }
func parseContainer(ef *epubFile) (*container, error) { c := &container{} rootpath := "META-INF/container.xml" rootfile := findZipFile(ef.r, rootpath) if rootfile == nil { return nil, NoEPUBError } fr, err := rootfile.Open() if err != nil { return nil, fmt.Errorf("could not open %s, %s", rootpath, err) } defer fr.Close() data, err := ioutil.ReadAll(fr) if err != nil { return nil, UnexpectedError } doc, err := gokogiri.ParseXml(data) if err != nil { return nil, InvalidXMLError } defer doc.Free() doc.RecursivelyRemoveNamespaces() res, _ := doc.Search("/container/rootfiles/rootfile") for _, node := range res { if node.Attr("media-type") == "application/oebps-package+xml" { c.OEBPSPackagePath = node.Attr("full-path") } } return c, nil }
func main() { // content, _ := ioutil.ReadFile("index.html") //doc, _ := gokogiri.ParseHtml(content) content := `<?xml version="1.0"?> <?xml-stylesheet href="catalog.xsl" type="text/xsl"?> <!DOCTYPE catalog SYSTEM "catalog.dtd"> <catalog> <product description="Cardigan Sweater" product_image="cardigan.jpg"> <catalog_item gender="Men's"> <item_number>QWZ5671</item_number> <price>39.95</price> <size description="Medium"> <color_swatch image="red_cardigan.jpg">Red</color_swatch> <color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch> </size> <size description="Large"> <color_swatch image="red_cardigan.jpg">Red</color_swatch> <color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch> </size> </catalog_item> <catalog_item gender="Women's"> <item_number>RRX9856</item_number> <price>42.50</price> <size description="Small"> <color_swatch image="red_cardigan.jpg">Red</color_swatch> <color_swatch image="navy_cardigan.jpg">Navy</color_swatch> <color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch> </size> <size description="Medium"> <color_swatch image="red_cardigan.jpg">Red</color_swatch> <color_swatch image="navy_cardigan.jpg">Navy</color_swatch> <color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch> <color_swatch image="black_cardigan.jpg">Black</color_swatch> </size> <size description="Large"> <color_swatch image="navy_cardigan.jpg">Navy</color_swatch> <color_swatch image="black_cardigan.jpg">Black</color_swatch> </size> <size description="Extra Large"> <color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch> <color_swatch image="black_cardigan.jpg">Black</color_swatch> </size> </catalog_item> </product> </catalog>` doc, _ := gokogiri.ParseXml([]byte(content)) defer doc.Free() if len(os.Args) <= 1 { fmt.Println(doc.String()) } else if len(os.Args) == 3 { example4(os.Args[1], os.Args[2]) } else { fmt.Println("Usage: lx2_xpath-search1 xpath-expr new-value") } }
func processXmlString(unparsedXml []byte) { doc, err := gokogiri.ParseXml(unparsedXml) if err != nil { log.Println("Error parsing file:", err) return } firstNode, err := doc.Node.Search("//node") row := "|" // empty pipe separated row, starter processNode(firstNode[0], row) }
func parseOEBPSPackage(ef *epubFile, c *container) error { epub := ef.data file := findZipFile(ef.r, c.OEBPSPackagePath) if file == nil { return UnexpectedError } fr, err := file.Open() if err != nil { return fmt.Errorf("could not open %s, %s", c.OEBPSPackagePath, err) } defer fr.Close() data, err := ioutil.ReadAll(fr) if err != nil { return UnexpectedError } doc, err := gokogiri.ParseXml(data) if err != nil { return InvalidXMLError } defer doc.Free() doc.RecursivelyRemoveNamespaces() epub.Version = doc.Root().Attr("version") res, _ := doc.Search("/package/metadata") if len(res) != 1 { return NoEPUBError } mn := res[0] creators, _ := mn.Search("creator") contributors, _ := mn.Search("contributor") epub.Titles = parseTitles(mn) epub.Creators = parsePeople(creators) epub.Contributors = parsePeople(contributors) epub.Subjects = parseSubjects(mn) epub.Description = parseDescription(mn) epub.Publisher = parsePublisher(mn) epub.Dates = parseDates(mn) epub.Identifiers = parseIdentifiers(mn) epub.Source = parseSource(mn) epub.Languages = parseLanguages(mn) epub.Rights = parseRights(mn) return nil }
func main() { doc, _ := gokogiri.ParseXml([]byte(a)) defer doc.Free() xp := doc.DocXPathCtx() xp.RegisterNamespace("ns", "http://example.com/this") x := xpath.Compile("/ns:NodeA/ns:NodeB") groups, err := doc.Search(x) if err != nil { fmt.Println(err) } for i, group := range groups { fmt.Println(i, group.Content()) } }
func extract(file string, xpath string) { xml, err := ioutil.ReadFile(file) if err != nil { fmt.Fprintf(os.Stderr, "gox: Could not read %s\n", file) } doc, err := gokogiri.ParseXml(xml) defer doc.Free() if err != nil { fmt.Fprintf(os.Stderr, "gox: Could not parse %s\n", file) } nodes, _ := doc.Search(xpath) for n := range nodes { fmt.Println(nodes[n].String()) } }
func main() { doc, _ := gokogiri.ParseXml([]byte(file)) nodes, _ := doc.Search("/foo") for n := range nodes { fmt.Println(nodes[n].Name()) subnodes, _ := nodes[n].Search("bar") for s := range subnodes { fmt.Println(subnodes[s].Name()) } } fmt.Println("---") nodes, _ = doc.Search("bar") for n := range nodes { fmt.Println(nodes[n].Name()) } }
func findRSSTitle(rssUrl string) (string, error) { res, err := http.Get(rssUrl) if err != nil { return "", err } body, err := ioutil.ReadAll(res.Body) if err != nil { return "", err } res.Body.Close() doc, err := gokogiri.ParseXml(body) defer doc.Free() doc.RecursivelyRemoveNamespaces() nodes, err := doc.Search("//title") if err != nil { return "", err } if len(nodes) > 0 { return nodes[0].Content(), nil } return "", nil }
func findRSSURL(rawurl string) (string, error) { res, err := http.Get(rawurl) if err != nil { return "", err } body, err := ioutil.ReadAll(res.Body) if err != nil { return "", err } res.Body.Close() doc, err := gokogiri.ParseXml(body) defer doc.Free() if (doc.Root().Name() != "rss" && doc.Root().Name() != "feed") || err != nil { doc, _ := gokogiri.ParseHtml(body) defer doc.Free() doc.RecursivelyRemoveNamespaces() nodes, err := doc.Search("//link") if err != nil { return "", err } for _, v := range nodes { if v.Attribute("rel").Value() == "alternate" || v.Attribute("rel").Value() == "feed" { u, _ := url.Parse(v.Attribute("href").Value()) if u.IsAbs() { return u.String(), nil } else { baseU, _ := url.Parse(rawurl) u.Scheme = baseU.Scheme u.Host = baseU.Host return u.String(), nil } } } } else { return res.Request.URL.String(), nil } return "", errors.New("Feed URL not found") }
func (s *EposServer) ServeHTTP(rw http.ResponseWriter, req *http.Request) { // send origin headers if origin := req.Header.Get("Origin"); origin != "" { rw.Header().Set("Access-Control-Allow-Origin", origin) rw.Header().Set("Access-Control-Allow-Methods", "POST, GET, OPTIONS, PUT, DELETE") rw.Header().Set("Access-Control-Allow-Headers", "Accept, Content-Type, Content-Length, Accept-Encoding, X-CSRF-Token, Authorization, If-Modified-Since, SOAPAction") } // stop if its options if req.Method == "OPTIONS" { log.Println("OPTIONS %s", req.URL) return } // handle crappy soap action if req.Method == "POST" { // grab posted body data, _ := ioutil.ReadAll(req.Body) log.Printf("POST %s:\n%s\n\n", req.URL, string(data)) // parse xml with gokogiri doc, _ := gokogiri.ParseXml(data) defer doc.Free() // load print nodes from xml doc epos_nodes, err := getEposNodes(doc) if err != nil { rw.WriteHeader(503) log.Fatal(err) return } // init printer s.printer.Init() // loop over nodes for _, en := range epos_nodes { // grab name and inner text name := en.Name() content := en.Content() // grab parameters params := make(map[string]string) for _, attr := range en.Attributes() { params[attr.Name()] = attr.Value() } // write data to printer s.printer.WriteNode(name, params, content) } // end s.printer.End() // flush writer s.printerWriter.Flush() //rw.WriteHeader(402) // write soap response writeSoapResponse(rw, req, "") return } // force an error for everything else rw.WriteHeader(403) // Lets Gorilla work s.r.ServeHTTP(rw, req) }
func rssCloud(w http.ResponseWriter, r *http.Request) { logr.Debugln("Yay a cloud request!") if r.Method != "POST" { w.Header().Set("Allow", "POST") http.Error(w, "POST is required", http.StatusMethodNotAllowed) return } bodyBytes := make([]byte, r.ContentLength) _, err := r.Body.Read(bodyBytes) if err != nil { logr.Errln("Could not read request body:", err.Error()) http.Error(w, "Could not read body: "+err.Error(), http.StatusInternalServerError) return } requestDoc, err := gokogiri.ParseXml(bodyBytes) if err != nil { writeXmlRpcError(w, err) return } request := new(RssCloudRequest) err = request.Unpack(requestDoc) if err != nil { writeXmlRpcError(w, err) return } hostname := r.Header.Get("X-Forwarded-For") if hostname == "" { hostname = r.RemoteAddr } else { hostnames := strings.SplitN(hostname, ",", 2) hostname = hostnames[0] } host, _, err := net.SplitHostPort(hostname) request.Host = host if request.RequestMethodName != "cloud.notify" { writeXmlRpcError(w, fmt.Errorf("Unknown method %s", request.RequestMethodName)) return } if !request.IsXmlRpc { writeXmlRpcError(w, fmt.Errorf("Only XML-RPC is supported")) return } // TODO: use https as appropriate here? er, does river2 support cloud endpoints on HTTPS? if request.FeedURL != fmt.Sprintf("http://%s/rss", r.Host) { writeXmlRpcError(w, fmt.Errorf("RSS URL %s is not a feed managed here", request.FeedURL)) return } logr.Debugln("Yay, asked to call back to http://", request.Host, ":", request.Port, request.Path, "with method", request.MethodName, "!") url, _ := url.Parse("/") if request.Port == 443 { url.Scheme = "https" } else { url.Scheme = "http" } if request.Port == 80 || request.Port == 443 { url.Host = request.Host } else { url.Host = net.JoinHostPort(request.Host, strconv.Itoa(int(request.Port))) } url.Path = request.Path urlString := url.String() rssCloud, err := RssCloudByURL(urlString) if err == sql.ErrNoRows { // That's cool. } else if err != nil { logr.Errln("Error loading rsscloud for URL", urlString, ":", err.Error()) http.Error(w, "error looking for rsscloud for URL "+urlString, http.StatusInternalServerError) return } if rssCloud == nil { rssCloud = NewRssCloud() rssCloud.URL = urlString } rssCloud.Method = request.MethodName // Subscribe until 25 hours from now. rssCloud.SubscribedUntil = time.Now().Add(time.Duration(25) * time.Hour).UTC() err = rssCloud.Save() if err != nil { logr.Errln("Error saving rsscloud for URL", urlString, ":", err.Error()) http.Error(w, "error saving rsscloud for URL "+urlString, http.StatusInternalServerError) return } output := `<?xml version="1.0" encoding="UTF-8"?> <methodResponse> <params> <param> <value><boolean>1</boolean></value> </param> </params> </methodResponse>` w.Header().Set("Content-Type", "text/xml") w.Header().Set("Content-Length", strconv.Itoa(len(output))) w.Write([]byte(output)) }