func trNodeToSchedule(scheduleNode xml.Node) (item ScheduleItem, err error) { results, err := scheduleNode.Search("./td/text()") if err != nil { return ScheduleItem{}, err } item = ScheduleItem{ TrainNumber: strings.TrimSpace(results[1].String()), Misc: strings.TrimSpace(results[2].String()), Class: strings.TrimSpace(results[3].String()), Relation: strings.TrimSpace(results[4].String()), StartingStation: strings.TrimSpace(results[5].String()), CurrentStation: strings.TrimSpace(results[6].String()), ArrivingTime: strings.TrimSpace(results[7].String()), DepartingTime: strings.TrimSpace(results[8].String()), Ls: strings.TrimSpace(results[9].String()), } if len(results) > 10 { item.Status = strings.TrimSpace(results[10].String()) } stationParts := strings.FieldsFunc(item.Relation, func(r rune) bool { return r == '-' }) item.EndStation = stationParts[1] // [ANGKE BOGOR] BOGOR is end station return }
func Link(performance xml.Node) string { anchor, err := performance.Search(".//a") if err != nil { fmt.Println(err) } return "http://www.bso.org" + anchor[0].Attr("href") }
func getContent(node gxtml.Node, cssQuery string) string { result, err := node.Search(toXpath(cssQuery)) if err != nil { panic(fmt.Errorf("Failed to find %v node", cssQuery)) } return result[0].Content() }
func parseSource(m xml.Node) string { res, _ := m.Search("source") if len(res) > 0 { return res[0].Content() } return "" }
func parseRights(m xml.Node) string { res, _ := m.Search("rights") if len(res) > 0 { return res[0].Content() } return "" }
func parsePublisher(m xml.Node) string { publisher := "" res, _ := m.Search("publisher") if len(res) > 0 { publisher = res[0].Content() } return publisher }
func parseLanguages(m xml.Node) []string { languages := []string{} res, _ := m.Search("language") for _, n := range res { languages = append(languages, n.Content()) } return languages }
func parseTitles(m xml.Node) []string { titles := []string{} res, _ := m.Search("title") for _, n := range res { titles = append(titles, n.Content()) } return titles }
func parseSubjects(m xml.Node) []string { subjects := []string{} res, _ := m.Search("subject") for _, n := range res { subjects = append(subjects, n.Content()) } return subjects }
func parseDescription(m xml.Node) string { description := "" res, _ := m.Search("description") if len(res) > 0 { description = res[0].Content() } return description }
func parseIdentifiers(m xml.Node) []*Identifier { identifiers := []*Identifier{} res, _ := m.Search("identifier") for _, n := range res { identifier := Identifier{Identifier: n.Content(), Scheme: n.Attr("scheme")} identifiers = append(identifiers, &identifier) } return identifiers }
func Pieces(details xml.Node) string { pieces, _ := details.Search(".//div[@class='program-media-collapse']/h3") var piecesString string piecesString = "<ul class=\"works\">" for _, piece := range pieces { piecesString += "<li>" piecesString += piece.Content() piecesString += "</li>" } piecesString += "</ul>" return piecesString }
func linkDensity(node xml.Node) float64 { links, err := node.Search("a") if err != nil { return 0.0 } llength := 0.0 for _, link := range links { llength += float64(len(link.Content())) } tlength := float64(len(node.Content())) return llength / tlength }
func processNode(node xml.Node, row string) { row = row + node.Attr("TEXT") + "|" kids, err := node.Search("node") if err != nil { log.Println("Error searching for node:", err) return } if len(kids) > 0 { // has children, not a leaf node for i := range kids { processNode(kids[i], row) } } else { fmt.Println(row) // print leaf node } }
/* Not working because: http://www.sc2ratings.com/players.php?realname=Yang,%20Hee-Soo is parsed as: http://www.sc2ratings.com/players.php?realname=Yang, Hee-Soo */ func parseLeagues(player xml.Node) []string { out := []string{} partialUrl, err := player.Search(".//a/@href") errorHandler(err) if len(partialUrl) == 1 { playerPageUrl := "http://www.sc2ratings.com/" + partialUrl[0].String() playerPageSource := retrievePageSource(playerPageUrl) playerPage, err := gokogiri.ParseHtml(playerPageSource) errorHandler(err) defer playerPage.Free() fmt.Println(playerPage) } return out }
func parseDates(m xml.Node) []*Date { dates := []*Date{} res, _ := m.Search("date") for _, n := range res { date := Date{Date: n.Content(), Event: n.Attr("event")} dates = append(dates, &date) } res, _ = m.Search("meta[@property='dcterms:modified']") if len(res) > 0 { date := Date{Date: res[0].Content(), Event: "modified"} dates = append(dates, &date) } return dates }
func loadInputs(doc xml.Node) ([]*Input, error) { nodes, e := doc.Search(".//input") if e != nil { return nil, e } out := []*Input{} for _, n := range nodes { i := &Input{ Type: n.Attr("type"), Name: n.Attr("name"), Value: n.Attr("value"), Checked: n.Attr("checked") == "checked", } out = append(out, i) } return out, nil }
func Title(performance xml.Node) string { title, _ := performance.Search(".//div[@class='performance-title']") return title[0].Content() }
func Time(performance xml.Node) string { time, _ := performance.Search(".//span[@class='performance-time']") return time[0].Content() }
func Day(performance xml.Node) string { day, _ := performance.Search(".//span[contains(@class, 'performance-day')]") return day[0].Content() }
// Parse a match row into a managed object func GetParsedMatch(n xml.Node) (pm *ParsedMatch, err error) { pm = &ParsedMatch{} match_url, _ := n.Search("td/a/@href") red, _ := n.Search("td/a/span[@class='redtext']/text()") redvalue, _ := n.Search("td/a/span[@class='redtext']/following-sibling::text()") blue, _ := n.Search("td/a/span[@class='bluetext']/text()") bluevalue, _ := n.Search("td/a/span[@class='bluetext']/following-sibling::text()") winner, _ := n.Search("td[position() = 2]/span/text()") bettors, _ := n.Search("td[last()]/text()") if len(match_url) > 0 { pm.MatchId, _ = strconv.Atoi(numRx.FindString(match_url[0].String())) } if len(redvalue) > 0 { pm.RedBets, _ = strconv.Atoi(numRx.FindString(redvalue[0].String())) } if len(bluevalue) > 0 { pm.BlueBets, _ = strconv.Atoi(numRx.FindString(bluevalue[0].String())) } pm.Red = nameSub(html.UnescapeString(red[0].String())) pm.Blue = nameSub(html.UnescapeString(blue[0].String())) pm.Bettors, _ = strconv.Atoi(bettors[0].String()) if len(winner) > 0 { pm.Winner = nameSub(html.UnescapeString(winner[0].String())) if pm.Winner == pm.Red { pm.FightWinner = spicerack.WINNER_RED } else if pm.Winner == pm.Blue { pm.FightWinner = spicerack.WINNER_BLUE } } if pm.MatchId == 0 { err = errors.New("Unable to parse match id.") } else if len(pm.Red) == 0 || len(pm.Blue) == 0 { err = errors.New("Red or Blue fighter is an empty string.") } else if int(pm.FightWinner) == 0 { err = errors.New("No winner found.") } else if pm.MatchId < 51966 { err = errors.New("Pre-matchmaking fight. Ignored.") } return pm, err }