func (record *Record) processOffJoins(doc *html.HtmlDocument) { xPath := xpath.NewXPath(doc.DocPtr()) // join the offence with the defendants and verdict joinPtrs := xPath.Evaluate(doc.Root().NodePtr(), xpath.Compile("//join[@result='criminalCharge']")) for _, nodePtr := range joinPtrs { node := xml.NewNode(nodePtr, doc) targets := strings.Split(node.Attr("targets"), " ") var personId, offId, verdictId string for _, targetId := range targets { if strings.Contains(targetId, "defend") { personId = targetId } if strings.Contains(targetId, "off") { offId = targetId } if strings.Contains(targetId, "verdict") { verdictId = targetId } } offence := record.findOffence(offId) if offence == nil { panic("couldn't find offence " + offId) } person := record.findPerson(personId) if person != nil { offence.Defendants = append(offence.Defendants, person) } verdict := record.findVerdict(verdictId) if verdict != nil { offence.Verdict = verdict } } }
func getNewsInfo(doc *html.HtmlDocument) ([]xml.Node, error) { xp := "//body/div/div/div/div/div/div/div/div/div/div/div/div/div/div/ul/li" xps := xpath.Compile(xp) newDatas, err := doc.Root().Search(xps) if err != nil { return nil, err } return newDatas, nil }
func (record *Record) processVerdicts(doc *html.HtmlDocument) { xPath := xpath.NewXPath(doc.DocPtr()) verdictPtrs := xPath.Evaluate(doc.Root().NodePtr(), xpath.Compile("//rs[@type='verdictDescription']")) verdicts := make([]Verdict, len(verdictPtrs)) for i, nodePtr := range verdictPtrs { node := xml.NewNode(nodePtr, doc) verdict := Verdict{} verdict.Id = node.Attr("id") verdict.Desc = cleanUpContent(node.Content()) verdict.SetType(getInterp(nodePtr, "verdictCategory", doc)) verdicts[i] = verdict } record.Verdicts = verdicts }
func (record *Record) processOffences(doc *html.HtmlDocument) { xPath := xpath.NewXPath(doc.DocPtr()) offencePtrs := xPath.Evaluate(doc.Root().NodePtr(), xpath.Compile("//rs[@type='offenceDescription']")) offences := make([]Offence, len(offencePtrs)) for i, nodePtr := range offencePtrs { node := xml.NewNode(nodePtr, doc) offence := Offence{} offence.Id = node.Attr("id") offence.Category = getInterp(nodePtr, "offenceCategory", doc) offence.SubCategory = getInterp(nodePtr, "offenceSubcategory", doc) offence.Desc = cleanUpContent(node.Content()) offences[i] = offence } record.Offences = offences }
func docSearch(doc *ghtml.HtmlDocument, elementName string, pageName string, xpath string, mustFind bool) []gxml.Node { elementArray, err := doc.Root().Search(xpath) if (err != nil || len(elementArray) == 0) && mustFind == false { return nil } if err != nil { fmt.Fprintf(os.Stderr, "Error locating element \"%s\" in page %s (incorrect xpath?): %v\n", elementName, pageName, err) //fmt.Fprintf(os.Stderr, " doc=%+v\n", doc) os.Exit(1) } if len(elementArray) == 0 { fmt.Fprintf(os.Stderr, "Error locating element \"%s\" in page %s (incorrect xpath?): len() == 0\n", elementName, pageName) //fmt.Fprintf(os.Stderr, " doc=%+v\n", doc) os.Exit(1) } return elementArray }
func (record *Record) processPersons(doc *html.HtmlDocument) { xPath := xpath.NewXPath(doc.DocPtr()) personPtrs := xPath.Evaluate(doc.Root().NodePtr(), xpath.Compile("//persname")) persons := make([]Person, len(personPtrs)) for i, nodePtr := range personPtrs { node := xml.NewNode(nodePtr, doc) person := Person{} person.Id = node.Attr("id") person.GivenName = getInterp(nodePtr, "given", doc) person.Surname = getInterp(nodePtr, "surname", doc) person.SetType(node.Attr("type")) person.SetGender(getInterp(nodePtr, "gender", doc)) persons[i] = person } record.Persons = persons }
// ProcessField method fetches data from passed document func (f *Field) ProcessField(d *html.HtmlDocument) interface{} { var value interface{} var node xml.Node selector := xpath.Compile(f.Selector) result, _ := d.Root().Search(selector) if len(result) > 0 { node = result[0] } else { return "" } if f.Callback != nil { value = f.Callback(&node) } else { value = node.Content() } return value }