func NewRecord(content []byte) (record *Record) { doc, err := gokogiri.ParseHtml([]byte(content)) if err != nil { panic(err) } displayText := cleanUpContent(doc.String()) record = &Record{RawText: content, DisplayText: displayText} dateStr := getInterp(doc.Root().NodePtr(), "date", doc) date, err := time.Parse("20060102", dateStr) if err != nil { record.Date = nil } else { record.Date = &date } xPath := xpath.NewXPath(doc.DocPtr()) nodePtrs := xPath.Evaluate(doc.Root().NodePtr(), xpath.Compile("//div1")) node := xml.NewNode(nodePtrs[0], doc) record.Id = node.Attr("id") record.Type = node.Attr("type") record.processPersons(doc) record.processOffences(doc) record.processVerdicts(doc) record.processOffJoins(doc) return }
func (record *Record) processOffJoins(doc *html.HtmlDocument) { xPath := xpath.NewXPath(doc.DocPtr()) // join the offence with the defendants and verdict joinPtrs := xPath.Evaluate(doc.Root().NodePtr(), xpath.Compile("//join[@result='criminalCharge']")) for _, nodePtr := range joinPtrs { node := xml.NewNode(nodePtr, doc) targets := strings.Split(node.Attr("targets"), " ") var personId, offId, verdictId string for _, targetId := range targets { if strings.Contains(targetId, "defend") { personId = targetId } if strings.Contains(targetId, "off") { offId = targetId } if strings.Contains(targetId, "verdict") { verdictId = targetId } } offence := record.findOffence(offId) if offence == nil { panic("couldn't find offence " + offId) } person := record.findPerson(personId) if person != nil { offence.Defendants = append(offence.Defendants, person) } verdict := record.findVerdict(verdictId) if verdict != nil { offence.Verdict = verdict } } }
// get the value out of an <interp> tag func getInterp(basePtr unsafe.Pointer, interpType string, doc *html.HtmlDocument) (value string) { xPath := xpath.NewXPath(doc.DocPtr()) nodePtrs := xPath.Evaluate(basePtr, xpath.Compile(".//interp[@type='"+ interpType+"']")) if len(nodePtrs) == 1 { node := xml.NewNode(nodePtrs[0], doc) value = node.Attr("value") } return }
func (record *Record) processVerdicts(doc *html.HtmlDocument) { xPath := xpath.NewXPath(doc.DocPtr()) verdictPtrs := xPath.Evaluate(doc.Root().NodePtr(), xpath.Compile("//rs[@type='verdictDescription']")) verdicts := make([]Verdict, len(verdictPtrs)) for i, nodePtr := range verdictPtrs { node := xml.NewNode(nodePtr, doc) verdict := Verdict{} verdict.Id = node.Attr("id") verdict.Desc = cleanUpContent(node.Content()) verdict.SetType(getInterp(nodePtr, "verdictCategory", doc)) verdicts[i] = verdict } record.Verdicts = verdicts }
//NewDocument wraps the pointer to the C struct. // // TODO: this should probably not be exported. func NewDocument(p unsafe.Pointer, contentLen int, inEncoding, outEncoding []byte) (doc *XmlDocument) { inEncoding = AppendCStringTerminator(inEncoding) outEncoding = AppendCStringTerminator(outEncoding) xmlNode := &XmlNode{Ptr: (*C.xmlNode)(p)} docPtr := (*C.xmlDoc)(p) doc = &XmlDocument{Ptr: docPtr, Node: xmlNode, InEncoding: inEncoding, OutEncoding: outEncoding, InputLen: contentLen} doc.UnlinkedNodes = make(map[*C.xmlNode]bool) doc.XPathCtx = xpath.NewXPath(p) doc.Type = xmlNode.NodeType() doc.fragments = make([]*DocumentFragment, 0, initialFragments) doc.Me = doc xmlNode.Document = doc //runtime.SetFinalizer(doc, (*XmlDocument).Free) return }
func (record *Record) processOffences(doc *html.HtmlDocument) { xPath := xpath.NewXPath(doc.DocPtr()) offencePtrs := xPath.Evaluate(doc.Root().NodePtr(), xpath.Compile("//rs[@type='offenceDescription']")) offences := make([]Offence, len(offencePtrs)) for i, nodePtr := range offencePtrs { node := xml.NewNode(nodePtr, doc) offence := Offence{} offence.Id = node.Attr("id") offence.Category = getInterp(nodePtr, "offenceCategory", doc) offence.SubCategory = getInterp(nodePtr, "offenceSubcategory", doc) offence.Desc = cleanUpContent(node.Content()) offences[i] = offence } record.Offences = offences }
func (record *Record) processPersons(doc *html.HtmlDocument) { xPath := xpath.NewXPath(doc.DocPtr()) personPtrs := xPath.Evaluate(doc.Root().NodePtr(), xpath.Compile("//persname")) persons := make([]Person, len(personPtrs)) for i, nodePtr := range personPtrs { node := xml.NewNode(nodePtr, doc) person := Person{} person.Id = node.Attr("id") person.GivenName = getInterp(nodePtr, "given", doc) person.Surname = getInterp(nodePtr, "surname", doc) person.SetType(node.Attr("type")) person.SetGender(getInterp(nodePtr, "gender", doc)) persons[i] = person } record.Persons = persons }
func (store *Store) LoadPrice(url string) (price float64, err error) { resp, err := http.Get(url) if err != nil { return } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { return } doc, err := gokogiri.ParseHtml(body) if err != nil { return } nxpath := xpath.NewXPath(doc.DocPtr()) nodes, err := nxpath.Evaluate(doc.DocPtr(), store.compiledXPath) if err != nil { return } if len(nodes) == 0 { fmt.Printf("Check XPath correctness (not found) for domain: %s\n", store.Domain) return } price_raw := xml.NewNode(nodes[0], doc).InnerHtml() price_raw = strings.Trim(price_raw, "$ \n\r") price, err = strconv.ParseFloat(price_raw, 64) if err != nil { fmt.Printf("Check XPath correctness (not monetary) for domain: %s\n", store.Domain) return } return }