/
resulthandler.go
96 lines (83 loc) · 1.89 KB
/
resulthandler.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
package main
import (
"bytes"
"io"
"log"
"net/http"
"net/url"
"appengine"
"appengine/urlfetch"
"code.google.com/p/cascadia"
"code.google.com/p/go.net/html"
"github.com/gorilla/schema"
)
func ResultHandler(w http.ResponseWriter, r *http.Request) {
var s = new(Selection) //Returns a pointer to a new Selection type
var decoder = schema.NewDecoder()
err := r.ParseForm()
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
err = decoder.Decode(s, r.Form)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if s.Selector == "" || s.URL == "" {
http.Redirect(w, r, "/", http.StatusFound) //If the query or the format is empty, redirect to the home page.
return
}
c := appengine.NewContext(r)
client := urlfetch.Client(c)
result := matchSelector(s, client)
buf := new(bytes.Buffer)
x := struct {
Result []string
S Selection
}{result, *s}
err = templates.ExecuteTemplate(buf, "result.html", x)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
io.Copy(w, buf)
// fmt.Fprintf(w, "%s : %s\n-----\n%s", s.URL, s.Selector, result)
}
func matchSelector(s *Selection, client *http.Client) []string {
link, err := url.Parse(s.URL)
if err != nil {
log.Fatal("Incorrect url")
return nil
}
r, err := client.Get(link.String())
if err != nil {
log.Fatal(err)
}
doc, err := html.Parse(r.Body)
if err != nil {
log.Fatal(err)
}
sel, err := cascadia.Compile(s.Selector)
if err != nil {
log.Fatal(err)
}
matches := sel.MatchAll(doc)
var result []string
for _, m := range matches {
result = append(result, nodeString(m))
}
return result
}
func nodeString(n *html.Node) string {
switch n.Type {
case html.TextNode:
return n.Data
case html.ElementNode:
return html.Token{
Type: html.StartTagToken,
Data: n.Data,
Attr: n.Attr,
}.String()
}
return ""
}