Example #1
0
func main() {
	xml := `<?xml version="1.0"?>
<!DOCTYPE doc [
<!ELEMENT doc (src | dest)*>
<!ELEMENT src EMPTY>
<!ELEMENT dest EMPTY>
<!ATTLIST src ref IDREF #IMPLIED>
<!ATTLIST dest id ID #IMPLIED>
]>
<doc>
  <src ref="foo"/>
  <dest id="foo"/>
  <src ref="foo"/>
  <employee>
    <firstname>John</firstname>
    <lastname>Smith</lastname>
    <food type="dessert">Ice cream</food>
    <description>
      Born on <date lang="norwegian">03.03.99</date> ....
    </description>
  </employee>
</doc>
`
	doc, _ := gokogiri.ParseXml([]byte(xml))
	defer doc.Free()

	rootElement := doc.Root()
	printElementNames(rootElement)
}
Example #2
0
func fetchAndFormatArticles(sem *chan int, sub *Subscription, articleChannel *chan *[]Article, waitGroup *sync.WaitGroup) {
	resp, err := http.Get(sub.Url)
	if err != nil {
		log.Printf("Unable to fetch articles from %v | Error: %v ", sub.Url, err.Error()) // TODO: Manage error. How?
		*sem <- 1
		waitGroup.Done()
		return
	}
	body, _ := ioutil.ReadAll(resp.Body)
	resp.Body.Close()
	doc, err := gokogiri.ParseXml(body)
	if err != nil {
		log.Println("Error parsing XML on feed with URL " + sub.Url)
		return
	}
	defer doc.Free()
	doc.RecursivelyRemoveNamespaces()
	var articles []Article
	if doc.Root().Name() == "rss" {
		formatRSS(sub, doc, &articles)
	} else if doc.Root().Name() == "feed" {
		formatAtom(sub, doc, &articles)
	}
	*articleChannel <- &articles
	*sem <- 1
	waitGroup.Done()
}
Example #3
0
func parseContainer(ef *epubFile) (*container, error) {
	c := &container{}

	rootpath := "META-INF/container.xml"
	rootfile := findZipFile(ef.r, rootpath)
	if rootfile == nil {
		return nil, NoEPUBError
	}

	fr, err := rootfile.Open()
	if err != nil {
		return nil, fmt.Errorf("could not open %s, %s", rootpath, err)
	}
	defer fr.Close()

	data, err := ioutil.ReadAll(fr)
	if err != nil {
		return nil, UnexpectedError
	}
	doc, err := gokogiri.ParseXml(data)
	if err != nil {
		return nil, InvalidXMLError
	}
	defer doc.Free()
	doc.RecursivelyRemoveNamespaces()

	res, _ := doc.Search("/container/rootfiles/rootfile")
	for _, node := range res {
		if node.Attr("media-type") == "application/oebps-package+xml" {
			c.OEBPSPackagePath = node.Attr("full-path")
		}
	}

	return c, nil
}
Example #4
0
func main() {
	// content, _ := ioutil.ReadFile("index.html")
	//doc, _ := gokogiri.ParseHtml(content)
	content := `<?xml version="1.0"?>
<?xml-stylesheet href="catalog.xsl" type="text/xsl"?>
<!DOCTYPE catalog SYSTEM "catalog.dtd">
<catalog>
   <product description="Cardigan Sweater" product_image="cardigan.jpg">
      <catalog_item gender="Men's">
         <item_number>QWZ5671</item_number>
         <price>39.95</price>
         <size description="Medium">
            <color_swatch image="red_cardigan.jpg">Red</color_swatch>
            <color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
         </size>
         <size description="Large">
            <color_swatch image="red_cardigan.jpg">Red</color_swatch>
            <color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
         </size>
      </catalog_item>
      <catalog_item gender="Women's">
         <item_number>RRX9856</item_number>
         <price>42.50</price>
         <size description="Small">
            <color_swatch image="red_cardigan.jpg">Red</color_swatch>
            <color_swatch image="navy_cardigan.jpg">Navy</color_swatch>
            <color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
         </size>
         <size description="Medium">
            <color_swatch image="red_cardigan.jpg">Red</color_swatch>
            <color_swatch image="navy_cardigan.jpg">Navy</color_swatch>
            <color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
            <color_swatch image="black_cardigan.jpg">Black</color_swatch>
         </size>
         <size description="Large">
            <color_swatch image="navy_cardigan.jpg">Navy</color_swatch>
            <color_swatch image="black_cardigan.jpg">Black</color_swatch>
         </size>
         <size description="Extra Large">
            <color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch>
            <color_swatch image="black_cardigan.jpg">Black</color_swatch>
         </size>
      </catalog_item>
   </product>
</catalog>`
	doc, _ := gokogiri.ParseXml([]byte(content))
	defer doc.Free()

	if len(os.Args) <= 1 {
		fmt.Println(doc.String())
	} else if len(os.Args) == 3 {
		example4(os.Args[1], os.Args[2])
	} else {
		fmt.Println("Usage: lx2_xpath-search1 xpath-expr new-value")
	}
}
Example #5
0
func processXmlString(unparsedXml []byte) {
	doc, err := gokogiri.ParseXml(unparsedXml)
	if err != nil {
		log.Println("Error parsing file:", err)
		return
	}
	firstNode, err := doc.Node.Search("//node")
	row := "|" // empty pipe separated row, starter
	processNode(firstNode[0], row)
}
Example #6
0
func parseOEBPSPackage(ef *epubFile, c *container) error {
	epub := ef.data

	file := findZipFile(ef.r, c.OEBPSPackagePath)
	if file == nil {
		return UnexpectedError
	}

	fr, err := file.Open()
	if err != nil {
		return fmt.Errorf("could not open %s, %s", c.OEBPSPackagePath, err)
	}
	defer fr.Close()

	data, err := ioutil.ReadAll(fr)
	if err != nil {
		return UnexpectedError
	}
	doc, err := gokogiri.ParseXml(data)
	if err != nil {
		return InvalidXMLError
	}
	defer doc.Free()
	doc.RecursivelyRemoveNamespaces()

	epub.Version = doc.Root().Attr("version")

	res, _ := doc.Search("/package/metadata")
	if len(res) != 1 {
		return NoEPUBError
	}
	mn := res[0]

	creators, _ := mn.Search("creator")
	contributors, _ := mn.Search("contributor")

	epub.Titles = parseTitles(mn)
	epub.Creators = parsePeople(creators)
	epub.Contributors = parsePeople(contributors)
	epub.Subjects = parseSubjects(mn)
	epub.Description = parseDescription(mn)
	epub.Publisher = parsePublisher(mn)
	epub.Dates = parseDates(mn)
	epub.Identifiers = parseIdentifiers(mn)
	epub.Source = parseSource(mn)
	epub.Languages = parseLanguages(mn)
	epub.Rights = parseRights(mn)

	return nil
}
Example #7
0
func main() {
	doc, _ := gokogiri.ParseXml([]byte(a))
	defer doc.Free()
	xp := doc.DocXPathCtx()
	xp.RegisterNamespace("ns", "http://example.com/this")
	x := xpath.Compile("/ns:NodeA/ns:NodeB")
	groups, err := doc.Search(x)
	if err != nil {
		fmt.Println(err)
	}
	for i, group := range groups {
		fmt.Println(i, group.Content())
	}
}
Example #8
0
File: gox.go Project: jcf/gox
func extract(file string, xpath string) {
	xml, err := ioutil.ReadFile(file)
	if err != nil {
		fmt.Fprintf(os.Stderr, "gox: Could not read %s\n", file)
	}

	doc, err := gokogiri.ParseXml(xml)
	defer doc.Free()

	if err != nil {
		fmt.Fprintf(os.Stderr, "gox: Could not parse %s\n", file)
	}

	nodes, _ := doc.Search(xpath)
	for n := range nodes {
		fmt.Println(nodes[n].String())
	}
}
Example #9
0
func main() {
	doc, _ := gokogiri.ParseXml([]byte(file))

	nodes, _ := doc.Search("/foo")
	for n := range nodes {
		fmt.Println(nodes[n].Name())
		subnodes, _ := nodes[n].Search("bar")
		for s := range subnodes {
			fmt.Println(subnodes[s].Name())
		}
	}

	fmt.Println("---")

	nodes, _ = doc.Search("bar")
	for n := range nodes {
		fmt.Println(nodes[n].Name())
	}
}
Example #10
0
func findRSSTitle(rssUrl string) (string, error) {
	res, err := http.Get(rssUrl)
	if err != nil {
		return "", err
	}
	body, err := ioutil.ReadAll(res.Body)
	if err != nil {
		return "", err
	}
	res.Body.Close()
	doc, err := gokogiri.ParseXml(body)
	defer doc.Free()
	doc.RecursivelyRemoveNamespaces()
	nodes, err := doc.Search("//title")
	if err != nil {
		return "", err
	}
	if len(nodes) > 0 {
		return nodes[0].Content(), nil
	}
	return "", nil
}
Example #11
0
func findRSSURL(rawurl string) (string, error) {
	res, err := http.Get(rawurl)
	if err != nil {
		return "", err
	}
	body, err := ioutil.ReadAll(res.Body)
	if err != nil {
		return "", err
	}
	res.Body.Close()
	doc, err := gokogiri.ParseXml(body)
	defer doc.Free()
	if (doc.Root().Name() != "rss" && doc.Root().Name() != "feed") || err != nil {
		doc, _ := gokogiri.ParseHtml(body)
		defer doc.Free()
		doc.RecursivelyRemoveNamespaces()
		nodes, err := doc.Search("//link")
		if err != nil {
			return "", err
		}
		for _, v := range nodes {
			if v.Attribute("rel").Value() == "alternate" || v.Attribute("rel").Value() == "feed" {
				u, _ := url.Parse(v.Attribute("href").Value())
				if u.IsAbs() {
					return u.String(), nil
				} else {
					baseU, _ := url.Parse(rawurl)
					u.Scheme = baseU.Scheme
					u.Host = baseU.Host
					return u.String(), nil
				}
			}
		}
	} else {
		return res.Request.URL.String(), nil
	}
	return "", errors.New("Feed URL not found")
}
Example #12
0
func (s *EposServer) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
	// send origin headers
	if origin := req.Header.Get("Origin"); origin != "" {
		rw.Header().Set("Access-Control-Allow-Origin", origin)
		rw.Header().Set("Access-Control-Allow-Methods", "POST, GET, OPTIONS, PUT, DELETE")
		rw.Header().Set("Access-Control-Allow-Headers", "Accept, Content-Type, Content-Length, Accept-Encoding, X-CSRF-Token, Authorization, If-Modified-Since, SOAPAction")
	}

	// stop if its options
	if req.Method == "OPTIONS" {
		log.Println("OPTIONS %s", req.URL)
		return
	}

	// handle crappy soap action
	if req.Method == "POST" {
		// grab posted body
		data, _ := ioutil.ReadAll(req.Body)
		log.Printf("POST %s:\n%s\n\n", req.URL, string(data))

		// parse xml with gokogiri
		doc, _ := gokogiri.ParseXml(data)
		defer doc.Free()

		// load print nodes from xml doc
		epos_nodes, err := getEposNodes(doc)
		if err != nil {
			rw.WriteHeader(503)
			log.Fatal(err)
			return
		}

		// init printer
		s.printer.Init()

		// loop over nodes
		for _, en := range epos_nodes {
			// grab name and inner text
			name := en.Name()
			content := en.Content()

			// grab parameters
			params := make(map[string]string)
			for _, attr := range en.Attributes() {
				params[attr.Name()] = attr.Value()
			}

			// write data to printer
			s.printer.WriteNode(name, params, content)
		}

		// end
		s.printer.End()

		// flush writer
		s.printerWriter.Flush()

		//rw.WriteHeader(402)
		// write soap response
		writeSoapResponse(rw, req, "")

		return
	}

	// force an error for everything else
	rw.WriteHeader(403)

	// Lets Gorilla work
	s.r.ServeHTTP(rw, req)
}
Example #13
0
func rssCloud(w http.ResponseWriter, r *http.Request) {
	logr.Debugln("Yay a cloud request!")

	if r.Method != "POST" {
		w.Header().Set("Allow", "POST")
		http.Error(w, "POST is required", http.StatusMethodNotAllowed)
		return
	}

	bodyBytes := make([]byte, r.ContentLength)
	_, err := r.Body.Read(bodyBytes)
	if err != nil {
		logr.Errln("Could not read request body:", err.Error())
		http.Error(w, "Could not read body: "+err.Error(), http.StatusInternalServerError)
		return
	}
	requestDoc, err := gokogiri.ParseXml(bodyBytes)
	if err != nil {
		writeXmlRpcError(w, err)
		return
	}

	request := new(RssCloudRequest)
	err = request.Unpack(requestDoc)
	if err != nil {
		writeXmlRpcError(w, err)
		return
	}

	hostname := r.Header.Get("X-Forwarded-For")
	if hostname == "" {
		hostname = r.RemoteAddr
	} else {
		hostnames := strings.SplitN(hostname, ",", 2)
		hostname = hostnames[0]
	}
	host, _, err := net.SplitHostPort(hostname)
	request.Host = host

	if request.RequestMethodName != "cloud.notify" {
		writeXmlRpcError(w, fmt.Errorf("Unknown method %s", request.RequestMethodName))
		return
	}

	if !request.IsXmlRpc {
		writeXmlRpcError(w, fmt.Errorf("Only XML-RPC is supported"))
		return
	}
	// TODO: use https as appropriate here? er, does river2 support cloud endpoints on HTTPS?
	if request.FeedURL != fmt.Sprintf("http://%s/rss", r.Host) {
		writeXmlRpcError(w, fmt.Errorf("RSS URL %s is not a feed managed here", request.FeedURL))
		return
	}

	logr.Debugln("Yay, asked to call back to http://", request.Host, ":", request.Port, request.Path,
		"with method", request.MethodName, "!")

	url, _ := url.Parse("/")
	if request.Port == 443 {
		url.Scheme = "https"
	} else {
		url.Scheme = "http"
	}
	if request.Port == 80 || request.Port == 443 {
		url.Host = request.Host
	} else {
		url.Host = net.JoinHostPort(request.Host, strconv.Itoa(int(request.Port)))
	}
	url.Path = request.Path
	urlString := url.String()

	rssCloud, err := RssCloudByURL(urlString)
	if err == sql.ErrNoRows {
		// That's cool.
	} else if err != nil {
		logr.Errln("Error loading rsscloud for URL", urlString, ":", err.Error())
		http.Error(w, "error looking for rsscloud for URL "+urlString, http.StatusInternalServerError)
		return
	}
	if rssCloud == nil {
		rssCloud = NewRssCloud()
		rssCloud.URL = urlString
	}
	rssCloud.Method = request.MethodName
	// Subscribe until 25 hours from now.
	rssCloud.SubscribedUntil = time.Now().Add(time.Duration(25) * time.Hour).UTC()
	err = rssCloud.Save()
	if err != nil {
		logr.Errln("Error saving rsscloud for URL", urlString, ":", err.Error())
		http.Error(w, "error saving rsscloud for URL "+urlString, http.StatusInternalServerError)
		return
	}

	output := `<?xml version="1.0" encoding="UTF-8"?>
		<methodResponse>
			<params>
				<param>
					<value><boolean>1</boolean></value>
				</param>
			</params>
		</methodResponse>`

	w.Header().Set("Content-Type", "text/xml")
	w.Header().Set("Content-Length", strconv.Itoa(len(output)))
	w.Write([]byte(output))
}