Beispiel #1
0
func titleSeparation(newDatas []xml.Node) ([]string, error) {
	var ret []string

	a := xpath.Compile("./a/span")
	for _, newData := range newDatas {
		titles, err := newData.Search(a)
		if err != nil {
			return nil, err
		}
		for _, title := range titles {
			newsAndTime := title.Content()
			timePath := xpath.Compile("./span")
			time, err := title.Search(timePath)
			if err != nil {
				return nil, err
			}
			if len(time) != 0 {
				cutstr := time[0].Content()
				ret = append(ret, strings.Trim(newsAndTime, cutstr))
			}
		}
	}

	return ret, nil
}
Beispiel #2
0
func main() {

	data, err := ioutil.ReadFile("xpath.xml")
	if err != nil {
		log.Fatal(err)
	}

	doc, err := xml.Parse(data, nil, nil, 0, xml.DefaultEncodingBytes)
	if err != nil {
		log.Fatal(err)
	}
	defer doc.Free()

	xp := doc.DocXPathCtx()
	xp.RegisterNamespace("folia", "http://ilk.uvt.nl/folia")

	fmt.Println("\nAll sentences with all words:\n")

	xps := xpath.Compile("//folia:s")
	xpw := xpath.Compile("folia:w/folia:t")

	ss, err := doc.Root().Search(xps)
	if err != nil {
		log.Fatal(err)
	}
	for _, s := range ss {
		fmt.Println(s.Attr("id"))
		ww, err := s.Search(xpw)
		if err != nil {
			log.Fatal(err)
		}
		for _, w := range ww {
			fmt.Println("\t" + w.Parent().Attr("id") + "  \t" + w.Content())
		}
	}

	fmt.Println("\nSearch for specific sentence:\n")
	n, err := doc.Root().Search(`//folia:s[@xml:id="WR-P-E-E-0000000020.head.4.s.2"]`)
	if err != nil {
		log.Fatal(err)
	}
	fmt.Println(n)

	fmt.Println("\nSearch for sentence with specific word:\n")
	n, err = doc.Root().Search(`//folia:w[@xml:id="WR-P-E-E-0000000020.head.4.s.2.w.2"]`)
	if err != nil {
		log.Fatal(err)
	}
	fmt.Println(n[0].Parent())

}
Beispiel #3
0
func main() {
	xpathString := ""
	if len(os.Args) < 2 {
		fmt.Fprintln(os.Stderr, "Missing second argument, XPATH!")
		os.Exit(2)
	} else {
		xpathString = os.Args[1]
	}

	page, _ := ioutil.ReadAll(os.Stdin)
	doc, err := gokogiri.ParseHtml(page)
	if err != nil {
		fmt.Fprintln(os.Stderr, "Problem parsing document.")
	}
	defer doc.Free()

	xps := xpath.Compile(xpathString)
	defer xps.Free()

	search, err := doc.Search(xps)
	if err == nil {
		for _, s := range search {
			fmt.Println(s.Content())
		}
	} else {
		fmt.Fprintln(os.Stderr, "Sorry. Got error.")
	}
}
Beispiel #4
0
// GetBlock GETs and parses practitioner's appointments on date
// along with any information needed to book available appointments.
func (s *Session) GetBlock(date time.Time, practitioner Practitioner) (Block, error) {

	root, err := s.loadBlock(date)
	if err != nil {
		return nil, err
	}

	var result Block

	for status, path := range paths {

		divs, err := root.Search(xpath.Compile(fmt.Sprintf(path, practitioner)))
		if err != nil {
			return nil, err
		}

		for _, div := range divs {

			timestamp, blockIndex, err := parseAppDiv(div)
			if err != nil {
				return nil, err
			}

			result = append(result, Appointment{
				session:      s,
				Timestamp:    time.Unix(timestamp, 0),
				Practitioner: practitioner,
				Status:       status,
				blockIndex:   blockIndex,
			})
		}
	}
	return result, nil
}
Beispiel #5
0
func (record *Record) processOffJoins(doc *html.HtmlDocument) {
	xPath := xpath.NewXPath(doc.DocPtr())
	// join the offence with the defendants and verdict
	joinPtrs := xPath.Evaluate(doc.Root().NodePtr(),
		xpath.Compile("//join[@result='criminalCharge']"))

	for _, nodePtr := range joinPtrs {
		node := xml.NewNode(nodePtr, doc)
		targets := strings.Split(node.Attr("targets"), " ")
		var personId, offId, verdictId string
		for _, targetId := range targets {
			if strings.Contains(targetId, "defend") {
				personId = targetId
			}
			if strings.Contains(targetId, "off") {
				offId = targetId
			}
			if strings.Contains(targetId, "verdict") {
				verdictId = targetId
			}
		}
		offence := record.findOffence(offId)
		if offence == nil {
			panic("couldn't find offence " + offId)
		}
		person := record.findPerson(personId)
		if person != nil {
			offence.Defendants = append(offence.Defendants, person)
		}
		verdict := record.findVerdict(verdictId)
		if verdict != nil {
			offence.Verdict = verdict
		}
	}
}
Beispiel #6
0
func NewRecord(content []byte) (record *Record) {
	doc, err := gokogiri.ParseHtml([]byte(content))
	if err != nil {
		panic(err)
	}

	displayText := cleanUpContent(doc.String())
	record = &Record{RawText: content, DisplayText: displayText}
	dateStr := getInterp(doc.Root().NodePtr(), "date", doc)
	date, err := time.Parse("20060102", dateStr)
	if err != nil {
		record.Date = nil
	} else {
		record.Date = &date
	}

	xPath := xpath.NewXPath(doc.DocPtr())
	nodePtrs := xPath.Evaluate(doc.Root().NodePtr(),
		xpath.Compile("//div1"))

	node := xml.NewNode(nodePtrs[0], doc)
	record.Id = node.Attr("id")
	record.Type = node.Attr("type")

	record.processPersons(doc)
	record.processOffences(doc)
	record.processVerdicts(doc)
	record.processOffJoins(doc)
	return
}
Beispiel #7
0
// As the Search function, but passing a VariableScope that can be used to reolve variable
// names or registered function references in the XPath being evaluated.
func (xmlNode *XmlNode) SearchWithVariables(data interface{}, v xpath.VariableScope) (result []Node, err error) {
	switch data := data.(type) {
	default:
		err = ERR_UNDEFINED_SEARCH_PARAM
	case string:
		if xpathExpr := xpath.Compile(data); xpathExpr != nil {
			defer xpathExpr.Free()
			result, err = xmlNode.SearchWithVariables(xpathExpr, v)
		} else {
			err = errors.New("cannot compile xpath: " + data)
		}
	case []byte:
		result, err = xmlNode.SearchWithVariables(string(data), v)
	case *xpath.Expression:
		xpathCtx := xmlNode.Document.DocXPathCtx()
		xpathCtx.SetResolver(v)
		nodePtrs, err := xpathCtx.EvaluateAsNodeset(unsafe.Pointer(xmlNode.Ptr), data)
		if nodePtrs == nil || err != nil {
			return nil, err
		}
		for _, nodePtr := range nodePtrs {
			result = append(result, NewNode(nodePtr, xmlNode.Document))
		}
	}
	return
}
Beispiel #8
0
func FiveThousandBest() (titles []string, err error) {
	res, err := http.Get("http://5000best.com/movies/1")
	if err != nil {
		return
	}

	body, err := ioutil.ReadAll(res.Body)
	if err != nil {
		return
	}

	doc, err := gokogiri.ParseHtml(body)
	if err != nil {
		return
	}

	exp := xpath.Compile("//a[@class='n']")

	nodes := doc.XPathCtx.Evaluate(doc.NodePtr(), exp)

	for _, np := range nodes {
		node := xml.NewNode(np, doc)
		title := node.InnerHtml()
		title = title[:len(title)-8]
		titles = append(titles, title)
	}

	return
}
Beispiel #9
0
func getNewsInfo(doc *html.HtmlDocument) ([]xml.Node, error) {
	xp := "//body/div/div/div/div/div/div/div/div/div/div/div/div/div/div/ul/li"
	xps := xpath.Compile(xp)
	newDatas, err := doc.Root().Search(xps)
	if err != nil {
		return nil, err
	}
	return newDatas, nil
}
Beispiel #10
0
// get the value out of an <interp> tag
func getInterp(basePtr unsafe.Pointer, interpType string, doc *html.HtmlDocument) (value string) {
	xPath := xpath.NewXPath(doc.DocPtr())
	nodePtrs := xPath.Evaluate(basePtr, xpath.Compile(".//interp[@type='"+
		interpType+"']"))
	if len(nodePtrs) == 1 {
		node := xml.NewNode(nodePtrs[0], doc)
		value = node.Attr("value")
	}
	return
}
Beispiel #11
0
func urlSeparation(newDatas []xml.Node) ([]string, error) {
	var ret []string

	a := xpath.Compile("./a/@href")

	for _, newData := range newDatas {
		urls, err := newData.Search(a)
		if err != nil {
			return nil, err
		}
		ret = append(ret, urls[0].Content())
	}
	return ret, nil
}
Beispiel #12
0
func main() {
	doc, _ := gokogiri.ParseXml([]byte(a))
	defer doc.Free()
	xp := doc.DocXPathCtx()
	xp.RegisterNamespace("ns", "http://example.com/this")
	x := xpath.Compile("/ns:NodeA/ns:NodeB")
	groups, err := doc.Search(x)
	if err != nil {
		fmt.Println(err)
	}
	for i, group := range groups {
		fmt.Println(i, group.Content())
	}
}
Beispiel #13
0
func parseHtml(page []byte) (*[]DubizzleResult, error) {
	// parse the web page
	doc, err := gokogiri.ParseHtml(page)
	if err != nil {
		return nil, err
	}
	println("parsed the doc: \n")

	// perform operations on the parsed page
	xp := xpath.Compile("//*[@id='results-list']/div")
	result_list, err := doc.Root().Search(xp)
	if err != nil {
		return nil, err
	}

	ads := []DubizzleResult{}
	for _, rslt := range result_list {
		xptitle := xpath.Compile(".//h3[@id='title']/span[@class='title']/a")
		xpprice := xpath.Compile(".//div[@class='price']")
		title_info, title_err := rslt.Search(xptitle)
		price_info, price_err := rslt.Search(xpprice)
		if title_err == nil || price_err == nil {
			if len(title_info) > 0 && len(price_info) > 0 {
				title := title_info[0].InnerHtml()
				price := price_info[0].InnerHtml()
				price = strings.TrimSpace(price)
				price = strings.Trim(price, "<br>")
				price = strings.TrimSpace(price)
				url := title_info[0].Attribute("href").String()
				ads = append(ads, DubizzleResult{Title: title, Price: price, Url: url})
			}
		}
	}
	doc.Free()
	return &ads, err
}
Beispiel #14
0
func (record *Record) processVerdicts(doc *html.HtmlDocument) {
	xPath := xpath.NewXPath(doc.DocPtr())
	verdictPtrs := xPath.Evaluate(doc.Root().NodePtr(),
		xpath.Compile("//rs[@type='verdictDescription']"))
	verdicts := make([]Verdict, len(verdictPtrs))

	for i, nodePtr := range verdictPtrs {
		node := xml.NewNode(nodePtr, doc)
		verdict := Verdict{}
		verdict.Id = node.Attr("id")
		verdict.Desc = cleanUpContent(node.Content())
		verdict.SetType(getInterp(nodePtr, "verdictCategory", doc))
		verdicts[i] = verdict
	}
	record.Verdicts = verdicts
}
Beispiel #15
0
func photoSeparation(newDatas []xml.Node) ([]bool, error) {
	var ret []bool
	a := xpath.Compile("./a/span[@class='icon-photo']")
	for _, newData := range newDatas {
		icons, err := newData.Search(a)
		if err != nil {
			return nil, err
		}
		if len(icons) == 0 {
			ret = append(ret, false)
		} else {
			ret = append(ret, true)
		}
	}
	return ret, nil
}
Beispiel #16
0
func (record *Record) processOffences(doc *html.HtmlDocument) {
	xPath := xpath.NewXPath(doc.DocPtr())
	offencePtrs := xPath.Evaluate(doc.Root().NodePtr(),
		xpath.Compile("//rs[@type='offenceDescription']"))
	offences := make([]Offence, len(offencePtrs))

	for i, nodePtr := range offencePtrs {
		node := xml.NewNode(nodePtr, doc)
		offence := Offence{}
		offence.Id = node.Attr("id")
		offence.Category = getInterp(nodePtr, "offenceCategory", doc)
		offence.SubCategory = getInterp(nodePtr, "offenceSubcategory", doc)
		offence.Desc = cleanUpContent(node.Content())
		offences[i] = offence
	}
	record.Offences = offences
}
Beispiel #17
0
func GetData(body []byte, xpathList map[string]interface{}, xpathChild map[string]interface{}) (result map[string]interface{}) {
	doc, _ := gokogiri.ParseHtml(body)
	defer doc.Free()
	result = map[string]interface{}{}
	for nodeName, v := range xpathList {
		xps := xpath.Compile(v.(string))
		ss, _ := doc.Root().Search(xps)
		for k, s := range ss {
			innerHtml := s.InnerHtml()
			item := make(map[string]interface{})
			item[nodeName] = innerHtml
			list := ppDownloader.GetData([]byte(innerHtml), xpathChild)
			result[strconv.Itoa(k)] = list
		}
	}

	return
}
Beispiel #18
0
func (record *Record) processPersons(doc *html.HtmlDocument) {
	xPath := xpath.NewXPath(doc.DocPtr())
	personPtrs := xPath.Evaluate(doc.Root().NodePtr(),
		xpath.Compile("//persname"))
	persons := make([]Person, len(personPtrs))

	for i, nodePtr := range personPtrs {
		node := xml.NewNode(nodePtr, doc)
		person := Person{}
		person.Id = node.Attr("id")
		person.GivenName = getInterp(nodePtr, "given", doc)
		person.Surname = getInterp(nodePtr, "surname", doc)
		person.SetType(node.Attr("type"))
		person.SetGender(getInterp(nodePtr, "gender", doc))
		persons[i] = person
	}
	record.Persons = persons
}
Beispiel #19
0
func (t *TvFourFiveSixSeven) GetResultByXpath(body []byte, x string) (list string, err error) {
	doc, err := gokogiri.ParseHtml(body)
	if err != nil {
		return
	}

	defer doc.Free()
	xps := xpath.Compile(x)
	ss, err := doc.Root().Search(xps)
	if err != nil {
		return
	}
	for _, s := range ss {
		return s.InnerHtml(), nil
	}

	return
}
Beispiel #20
0
func getEposNodes(doc *xml.XmlDocument) (retnodes []xml.Node, err error) {
	// grab the 'Body' element
	path := xpath.Compile("*[local-name()='Body']")
	nodes, e := doc.Root().Search(path)
	if e != nil {
		err = e
		return
	}

	// check that the data is present
	if len(nodes) < 1 || nodes[0].CountChildren() < 1 {
		err = errors.New("bad data")
		return
	}

	// get epos data
	return nodes[0].FirstChild().Search("./*")
}
func GetTemperature() string {
	resp, _ := http.Get("http://www.jma.go.jp/jp/amedas_h/today-44132.html?areaCode=000&groupCode=30")
	page, _ := ioutil.ReadAll(resp.Body)

	doc, _ := gokogiri.ParseHtml(page)
	defer doc.Free()

	xps := xpath.Compile("//*[@id=\"tbl_list\"]/tr/td[2]")
	ss, _ := doc.Root().Search(xps)

	var temperature string
	for _, s := range ss {
		if len(s.InnerHtml()) > 2 {
			temperature = s.InnerHtml()
		}
	}

	return temperature
}
Beispiel #22
0
func GetHumidity() string {
	resp, _ := http.Get("http://www.jma.go.jp/jp/amedas_h/today-44132.html?areaCode=000&groupCode=30")
	page, _ := ioutil.ReadAll(resp.Body)

	doc, _ := gokogiri.ParseHtml(page)
	defer doc.Free()

	xps := xpath.Compile("//*[@id=\"tbl_list\"]/tr/td[7]")
	ss, _ := doc.Root().Search(xps)

	var humidity string
	for _, s := range ss {
		if _, err := strconv.Atoi(s.InnerHtml()); err == nil {
			humidity = s.InnerHtml()
		}
	}

	return humidity
}
Beispiel #23
0
// In any other cases, the result will be coerced to a string.
func (xmlNode *XmlNode) EvalXPath(data interface{}, v xpath.VariableScope) (result interface{}, err error) {
	switch data := data.(type) {
	case string:
		if xpathExpr := xpath.Compile(data); xpathExpr != nil {
			defer xpathExpr.Free()
			result, err = xmlNode.EvalXPath(xpathExpr, v)
		} else {
			err = errors.New("cannot compile xpath: " + data)
		}
	case []byte:
		result, err = xmlNode.EvalXPath(string(data), v)
	case *xpath.Expression:
		xpathCtx := xmlNode.Document.DocXPathCtx()
		xpathCtx.SetResolver(v)
		err := xpathCtx.Evaluate(unsafe.Pointer(xmlNode.Ptr), data)
		if err != nil {
			return nil, err
		}
		rt := xpathCtx.ReturnType()
		switch rt {
		case xpath.XPATH_NODESET, xpath.XPATH_XSLT_TREE:
			nodePtrs, err := xpathCtx.ResultAsNodeset()
			if err != nil {
				return nil, err
			}
			var output []Node
			for _, nodePtr := range nodePtrs {
				output = append(output, NewNode(nodePtr, xmlNode.Document))
			}
			result = output
		case xpath.XPATH_NUMBER:
			result, _ = xpathCtx.ResultAsNumber()
		case xpath.XPATH_BOOLEAN:
			result, _ = xpathCtx.ResultAsBoolean()
		default:
			result, _ = xpathCtx.ResultAsString()
		}
	default:
		err = ERR_UNDEFINED_SEARCH_PARAM
	}
	return
}
Beispiel #24
0
// ProcessField method fetches data from passed document
func (f *Field) ProcessField(d *html.HtmlDocument) interface{} {
	var value interface{}
	var node xml.Node
	selector := xpath.Compile(f.Selector)
	result, _ := d.Root().Search(selector)

	if len(result) > 0 {
		node = result[0]
	} else {
		return ""
	}

	if f.Callback != nil {
		value = f.Callback(&node)
	} else {
		value = node.Content()
	}

	return value
}
Beispiel #25
0
func GetSummary(query string) (string, error) {
	resp, _ := http.Get(GenerateJaWikipediaURL(query))
	if resp.StatusCode != 200 {
		return "", errors.New("page not found")
	}
	page, _ := ioutil.ReadAll(resp.Body)

	doc, _ := gokogiri.ParseHtml(page)
	defer doc.Free()

	xps := xpath.Compile("//*[@id=\"mw-content-text\"]/p[1]")
	ss, _ := doc.Root().Search(xps)

	content := ""
	for _, s := range ss {
		content += s.Content()
	}

	return content, nil
}
Beispiel #26
0
func (t *TvFourFiveSixSeven) GetUrlsByXpath(body []byte, x string) (list []string, err error) {
	doc, err := gokogiri.ParseHtml(body)
	if err != nil {
		return
	}

	defer doc.Free()
	xps := xpath.Compile(x)
	ss, err := doc.Root().Search(xps)
	if err != nil {
		return
	}
	for _, s := range ss {
		ww, _ := s.Search("./li/a")
		for _, w := range ww {
			list = append(list, w.Attr("href"))
		}
	}

	return
}
Beispiel #27
0
func GetData(body []byte, dxpath map[string]interface{}) (result map[string]interface{}) {
	doc, _ := gokogiri.ParseHtml(body)
	defer doc.Free()
	result = map[string]interface{}{}
	for mapKey, v := range dxpath {
		xps := xpath.Compile(v.(string))
		ss, _ := doc.Root().Search(xps)

		for _, s := range ss {
			if mapKey == "author" {
				result[mapKey] = s.Attr("title")
			} else if mapKey == "wx_time" {
				result["time"] = s.Attr("t")
			} else {
				result[mapKey] = s.InnerHtml()
			}

		}
	}
	return
}
Beispiel #28
0
func parse(c *cli.Context, data []byte) {
	doc, err := xml.Parse(data, nil, nil, 0, xml.DefaultEncodingBytes)
	util.CheckErr(err)
	defer doc.Free()

	xp := doc.DocXPathCtx()
	for _, xmlns := range c.StringSlice("xmlns") {
		ns := strings.SplitN(xmlns, ":", 2)
		if c.Bool("verbose") {
			fmt.Println("NS " + ns[0] + "==" + ns[1])
		}
		xp.RegisterNamespace(ns[0], ns[1])
	}

	xps := xpath.Compile(c.String("xpath"))
	s, err := doc.Root().Search(xps)
	util.CheckErr(err)
	for _, s := range s {
		util.CheckErr(err)
		fmt.Println(s.Content())
	}
}
func xpathSelector(xs []string, apply xpathSelectorApply) Selector {
	exprs := []*xpath.Expression{}
	for _, x := range xs {
		exprs = append(exprs, xpath.Compile(x))
	}
	return func(url string, doc *html.HtmlDocument) (interface{}, error) {
		var value interface{}
		for _, expr := range exprs {
			matches, err := doc.EvalXPath(expr, nil)
			if err != nil {
				return nil, err
			}

			if nodeset, ok := matches.([]xml.Node); ok == true {
				for _, node := range nodeset {
					value = apply(node.Content(), value)
				}
			} else {
				switch match := matches.(type) {
				case float64:
					value = apply(strconv.FormatFloat(match, 'f', 10, 64), value)
				case bool:
					if match {
						value = apply("true", value)
					} else {
						value = apply("false", value)
					}
				case string:
					value = apply(match, value)
				}
			}

		}

		return value, nil
	}
}
Beispiel #30
0
// If a non-nil VariableScope is provided, any variables or registered functions present
// in the xpath will be resolved.
func (xmlNode *XmlNode) EvalXPathAsBoolean(data interface{}, v xpath.VariableScope) (result bool) {
	switch data := data.(type) {
	case string:
		if xpathExpr := xpath.Compile(data); xpathExpr != nil {
			defer xpathExpr.Free()
			result = xmlNode.EvalXPathAsBoolean(xpathExpr, v)
		} else {
			//err = errors.New("cannot compile xpath: " + data)
		}
	case []byte:
		result = xmlNode.EvalXPathAsBoolean(string(data), v)
	case *xpath.Expression:
		xpathCtx := xmlNode.Document.DocXPathCtx()
		xpathCtx.SetResolver(v)
		err := xpathCtx.Evaluate(unsafe.Pointer(xmlNode.Ptr), data)
		if err != nil {
			return false
		}
		result, _ = xpathCtx.ResultAsBoolean()
	default:
		//err = ERR_UNDEFINED_SEARCH_PARAM
	}
	return
}