Пример #1
0
func NewRecord(content []byte) (record *Record) {
	doc, err := gokogiri.ParseHtml([]byte(content))
	if err != nil {
		panic(err)
	}

	displayText := cleanUpContent(doc.String())
	record = &Record{RawText: content, DisplayText: displayText}
	dateStr := getInterp(doc.Root().NodePtr(), "date", doc)
	date, err := time.Parse("20060102", dateStr)
	if err != nil {
		record.Date = nil
	} else {
		record.Date = &date
	}

	xPath := xpath.NewXPath(doc.DocPtr())
	nodePtrs := xPath.Evaluate(doc.Root().NodePtr(),
		xpath.Compile("//div1"))

	node := xml.NewNode(nodePtrs[0], doc)
	record.Id = node.Attr("id")
	record.Type = node.Attr("type")

	record.processPersons(doc)
	record.processOffences(doc)
	record.processVerdicts(doc)
	record.processOffJoins(doc)
	return
}
Пример #2
0
func (record *Record) processOffJoins(doc *html.HtmlDocument) {
	xPath := xpath.NewXPath(doc.DocPtr())
	// join the offence with the defendants and verdict
	joinPtrs := xPath.Evaluate(doc.Root().NodePtr(),
		xpath.Compile("//join[@result='criminalCharge']"))

	for _, nodePtr := range joinPtrs {
		node := xml.NewNode(nodePtr, doc)
		targets := strings.Split(node.Attr("targets"), " ")
		var personId, offId, verdictId string
		for _, targetId := range targets {
			if strings.Contains(targetId, "defend") {
				personId = targetId
			}
			if strings.Contains(targetId, "off") {
				offId = targetId
			}
			if strings.Contains(targetId, "verdict") {
				verdictId = targetId
			}
		}
		offence := record.findOffence(offId)
		if offence == nil {
			panic("couldn't find offence " + offId)
		}
		person := record.findPerson(personId)
		if person != nil {
			offence.Defendants = append(offence.Defendants, person)
		}
		verdict := record.findVerdict(verdictId)
		if verdict != nil {
			offence.Verdict = verdict
		}
	}
}
Пример #3
0
// get the value out of an <interp> tag
func getInterp(basePtr unsafe.Pointer, interpType string, doc *html.HtmlDocument) (value string) {
	xPath := xpath.NewXPath(doc.DocPtr())
	nodePtrs := xPath.Evaluate(basePtr, xpath.Compile(".//interp[@type='"+
		interpType+"']"))
	if len(nodePtrs) == 1 {
		node := xml.NewNode(nodePtrs[0], doc)
		value = node.Attr("value")
	}
	return
}
Пример #4
0
func (record *Record) processVerdicts(doc *html.HtmlDocument) {
	xPath := xpath.NewXPath(doc.DocPtr())
	verdictPtrs := xPath.Evaluate(doc.Root().NodePtr(),
		xpath.Compile("//rs[@type='verdictDescription']"))
	verdicts := make([]Verdict, len(verdictPtrs))

	for i, nodePtr := range verdictPtrs {
		node := xml.NewNode(nodePtr, doc)
		verdict := Verdict{}
		verdict.Id = node.Attr("id")
		verdict.Desc = cleanUpContent(node.Content())
		verdict.SetType(getInterp(nodePtr, "verdictCategory", doc))
		verdicts[i] = verdict
	}
	record.Verdicts = verdicts
}
Пример #5
0
//NewDocument wraps the pointer to the C struct.
//
// TODO: this should probably not be exported.
func NewDocument(p unsafe.Pointer, contentLen int, inEncoding, outEncoding []byte) (doc *XmlDocument) {
	inEncoding = AppendCStringTerminator(inEncoding)
	outEncoding = AppendCStringTerminator(outEncoding)

	xmlNode := &XmlNode{Ptr: (*C.xmlNode)(p)}
	docPtr := (*C.xmlDoc)(p)
	doc = &XmlDocument{Ptr: docPtr, Node: xmlNode, InEncoding: inEncoding, OutEncoding: outEncoding, InputLen: contentLen}
	doc.UnlinkedNodes = make(map[*C.xmlNode]bool)
	doc.XPathCtx = xpath.NewXPath(p)
	doc.Type = xmlNode.NodeType()
	doc.fragments = make([]*DocumentFragment, 0, initialFragments)
	doc.Me = doc
	xmlNode.Document = doc
	//runtime.SetFinalizer(doc, (*XmlDocument).Free)
	return
}
Пример #6
0
func (record *Record) processOffences(doc *html.HtmlDocument) {
	xPath := xpath.NewXPath(doc.DocPtr())
	offencePtrs := xPath.Evaluate(doc.Root().NodePtr(),
		xpath.Compile("//rs[@type='offenceDescription']"))
	offences := make([]Offence, len(offencePtrs))

	for i, nodePtr := range offencePtrs {
		node := xml.NewNode(nodePtr, doc)
		offence := Offence{}
		offence.Id = node.Attr("id")
		offence.Category = getInterp(nodePtr, "offenceCategory", doc)
		offence.SubCategory = getInterp(nodePtr, "offenceSubcategory", doc)
		offence.Desc = cleanUpContent(node.Content())
		offences[i] = offence
	}
	record.Offences = offences
}
Пример #7
0
func (record *Record) processPersons(doc *html.HtmlDocument) {
	xPath := xpath.NewXPath(doc.DocPtr())
	personPtrs := xPath.Evaluate(doc.Root().NodePtr(),
		xpath.Compile("//persname"))
	persons := make([]Person, len(personPtrs))

	for i, nodePtr := range personPtrs {
		node := xml.NewNode(nodePtr, doc)
		person := Person{}
		person.Id = node.Attr("id")
		person.GivenName = getInterp(nodePtr, "given", doc)
		person.Surname = getInterp(nodePtr, "surname", doc)
		person.SetType(node.Attr("type"))
		person.SetGender(getInterp(nodePtr, "gender", doc))
		persons[i] = person
	}
	record.Persons = persons
}
Пример #8
0
func (store *Store) LoadPrice(url string) (price float64, err error) {
	resp, err := http.Get(url)
	if err != nil {
		return
	}

	defer resp.Body.Close()

	body, err := ioutil.ReadAll(resp.Body)

	if err != nil {
		return
	}

	doc, err := gokogiri.ParseHtml(body)

	if err != nil {
		return
	}

	nxpath := xpath.NewXPath(doc.DocPtr())
	nodes, err := nxpath.Evaluate(doc.DocPtr(), store.compiledXPath)

	if err != nil {
		return
	}

	if len(nodes) == 0 {
		fmt.Printf("Check XPath correctness (not found) for domain: %s\n", store.Domain)
		return
	}

	price_raw := xml.NewNode(nodes[0], doc).InnerHtml()
	price_raw = strings.Trim(price_raw, "$ \n\r")
	price, err = strconv.ParseFloat(price_raw, 64)

	if err != nil {
		fmt.Printf("Check XPath correctness (not monetary) for domain: %s\n", store.Domain)
		return
	}

	return
}