Beispiel #1
0
func (record *Record) processOffJoins(doc *html.HtmlDocument) {
	xPath := xpath.NewXPath(doc.DocPtr())
	// join the offence with the defendants and verdict
	joinPtrs := xPath.Evaluate(doc.Root().NodePtr(),
		xpath.Compile("//join[@result='criminalCharge']"))

	for _, nodePtr := range joinPtrs {
		node := xml.NewNode(nodePtr, doc)
		targets := strings.Split(node.Attr("targets"), " ")
		var personId, offId, verdictId string
		for _, targetId := range targets {
			if strings.Contains(targetId, "defend") {
				personId = targetId
			}
			if strings.Contains(targetId, "off") {
				offId = targetId
			}
			if strings.Contains(targetId, "verdict") {
				verdictId = targetId
			}
		}
		offence := record.findOffence(offId)
		if offence == nil {
			panic("couldn't find offence " + offId)
		}
		person := record.findPerson(personId)
		if person != nil {
			offence.Defendants = append(offence.Defendants, person)
		}
		verdict := record.findVerdict(verdictId)
		if verdict != nil {
			offence.Verdict = verdict
		}
	}
}
Beispiel #2
0
func getNewsInfo(doc *html.HtmlDocument) ([]xml.Node, error) {
	xp := "//body/div/div/div/div/div/div/div/div/div/div/div/div/div/div/ul/li"
	xps := xpath.Compile(xp)
	newDatas, err := doc.Root().Search(xps)
	if err != nil {
		return nil, err
	}
	return newDatas, nil
}
Beispiel #3
0
func (record *Record) processVerdicts(doc *html.HtmlDocument) {
	xPath := xpath.NewXPath(doc.DocPtr())
	verdictPtrs := xPath.Evaluate(doc.Root().NodePtr(),
		xpath.Compile("//rs[@type='verdictDescription']"))
	verdicts := make([]Verdict, len(verdictPtrs))

	for i, nodePtr := range verdictPtrs {
		node := xml.NewNode(nodePtr, doc)
		verdict := Verdict{}
		verdict.Id = node.Attr("id")
		verdict.Desc = cleanUpContent(node.Content())
		verdict.SetType(getInterp(nodePtr, "verdictCategory", doc))
		verdicts[i] = verdict
	}
	record.Verdicts = verdicts
}
Beispiel #4
0
func (record *Record) processOffences(doc *html.HtmlDocument) {
	xPath := xpath.NewXPath(doc.DocPtr())
	offencePtrs := xPath.Evaluate(doc.Root().NodePtr(),
		xpath.Compile("//rs[@type='offenceDescription']"))
	offences := make([]Offence, len(offencePtrs))

	for i, nodePtr := range offencePtrs {
		node := xml.NewNode(nodePtr, doc)
		offence := Offence{}
		offence.Id = node.Attr("id")
		offence.Category = getInterp(nodePtr, "offenceCategory", doc)
		offence.SubCategory = getInterp(nodePtr, "offenceSubcategory", doc)
		offence.Desc = cleanUpContent(node.Content())
		offences[i] = offence
	}
	record.Offences = offences
}
Beispiel #5
0
func docSearch(doc *ghtml.HtmlDocument, elementName string, pageName string, xpath string, mustFind bool) []gxml.Node {
	elementArray, err := doc.Root().Search(xpath)
	if (err != nil || len(elementArray) == 0) && mustFind == false {
		return nil
	}
	if err != nil {
		fmt.Fprintf(os.Stderr, "Error locating element \"%s\" in page %s (incorrect xpath?): %v\n", elementName, pageName, err)
		//fmt.Fprintf(os.Stderr, " doc=%+v\n", doc)
		os.Exit(1)
	}
	if len(elementArray) == 0 {
		fmt.Fprintf(os.Stderr, "Error locating element \"%s\" in page %s (incorrect xpath?): len() == 0\n", elementName, pageName)
		//fmt.Fprintf(os.Stderr, " doc=%+v\n", doc)
		os.Exit(1)
	}
	return elementArray
}
Beispiel #6
0
func (record *Record) processPersons(doc *html.HtmlDocument) {
	xPath := xpath.NewXPath(doc.DocPtr())
	personPtrs := xPath.Evaluate(doc.Root().NodePtr(),
		xpath.Compile("//persname"))
	persons := make([]Person, len(personPtrs))

	for i, nodePtr := range personPtrs {
		node := xml.NewNode(nodePtr, doc)
		person := Person{}
		person.Id = node.Attr("id")
		person.GivenName = getInterp(nodePtr, "given", doc)
		person.Surname = getInterp(nodePtr, "surname", doc)
		person.SetType(node.Attr("type"))
		person.SetGender(getInterp(nodePtr, "gender", doc))
		persons[i] = person
	}
	record.Persons = persons
}
Beispiel #7
0
// ProcessField method fetches data from passed document
func (f *Field) ProcessField(d *html.HtmlDocument) interface{} {
	var value interface{}
	var node xml.Node
	selector := xpath.Compile(f.Selector)
	result, _ := d.Root().Search(selector)

	if len(result) > 0 {
		node = result[0]
	} else {
		return ""
	}

	if f.Callback != nil {
		value = f.Callback(&node)
	} else {
		value = node.Content()
	}

	return value
}