/
parse.go
133 lines (119 loc) · 3.53 KB
/
parse.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
package main
import (
"regexp"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"golang.org/x/net/html"
)
var (
nameRegexp = regexp.MustCompile(`(.+) (\d+) ?(.*)?`)
routeRegexp = regexp.MustCompile(`\((\d\d\.\d\d\. \d\d:\d\d)\) (.*) -> (.*) \((\d\d\.\d\d\. \d\d:\d\d)\)`)
delayRegexp = regexp.MustCompile(`\d+`)
)
//TrainInfo contains both general info about a train as well as its current
// position and delay, just as downloaded from ŽSR's webpage.
type TrainInfo struct {
Category, Name string
Number int
Current Delay
From, To Location
}
// Location specifies a position of a train at the given moment. It can represent schedules,
// like in From and To fields of TrainInfo.
type Location struct {
Station string
Time time.Time
}
// Delay represents the curent position of the train.
type Delay struct {
// Location contains the station from which the information was acquired
// and scheduled arrival at the station.
Location
Actually time.Time // The real arrival time at the station
Delay int // Delay, as calculated by ŽSR
}
// Parse downloads the information from ŽSR's webpage and parses it into a slice
// of TrainInfo structs.
func Parse() (locations []TrainInfo, err error) {
doc, err := goquery.NewDocument("http://tis.zsr.sk/elis/pohybvlaku?jazyk_stranky=sk")
if err != nil {
return
}
delayHeaders := doc.Find(".accordionHeader")
delayTables := doc.Find(".trainDelayTable")
locations = make([]TrainInfo, len(delayHeaders.Nodes), len(delayHeaders.Nodes))
delayHeaders.Each(func(i int, element *goquery.Selection) {
parseHeader(element, &locations[i])
parseTable(delayTables.Get(i), &locations[i])
})
return
}
func parseHeader(element *goquery.Selection, info *TrainInfo) {
element.Find("span").Each(func(i int, element *goquery.Selection) {
switch i {
case 0:
info.Category, info.Number, info.Name = parseTrainDenomination(element.Text())
case 2:
info.From, info.To = parseTrainRoute(element.Text())
}
})
}
func parseTable(element *html.Node, info *TrainInfo) {
rows := flattenTable(element)
for i, row := range rows {
cells := row.Find("td")
switch i {
case 0:
if len(cells.Nodes) == 1 {
info.Current.Delay = 0
} else {
match := delayRegexp.FindString(cells.Text())
info.Current.Delay, _ = strconv.Atoi(match)
}
case 1:
info.Current.Station = cells.Get(1).FirstChild.Data
case 2:
info.Current.Actually = parseTime(cells.Get(1).FirstChild.Data)
case 3:
info.Current.Time = parseTime(cells.Get(1).FirstChild.Data)
}
}
}
func flattenTable(table *html.Node) (selections []*goquery.Selection) {
doc := goquery.NewDocumentFromNode(table)
selections = make([]*goquery.Selection, 0)
doc.Find("tr").Each(func(i int, row *goquery.Selection) {
row.RemoveFiltered("tr")
if row.Text() != "" {
selections = append(selections, row)
}
})
return
}
func parseTrainDenomination(d string) (category string, number int, name string) {
result := nameRegexp.FindStringSubmatch(d)
category = result[1]
number, _ = strconv.Atoi(result[2])
if len(result) > 3 {
name = strings.Trim(result[3], " ")
}
return
}
func parseTrainRoute(r string) (from, to Location) {
result := routeRegexp.FindStringSubmatch(r)
from.Time = parseTime(result[1])
from.Station = result[2]
to.Time = parseTime(result[4])
to.Station = result[3]
return
}
func parseTime(str string) time.Time {
t, err := time.ParseInLocation("02.01. 15:04", str, time.UTC)
if err != nil {
//fmt.Println(err)
return time.Time{}
}
return t.AddDate(time.Now().Year(), 0, 0)
}