/
spider_InTheaters.go
executable file
·136 lines (121 loc) · 3.02 KB
/
spider_InTheaters.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
package main
import (
"encoding/json"
"fmt"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/boltdb/bolt"
"github.com/celrenheit/spider"
)
// Main data structures
type InTheatersGroup struct {
Data []Movie
}
type Movie struct {
Title string
Date string
Score int
Url string
}
// Database functions
func (p *InTheatersGroup) Get(bucketName string) error {
if !open {
return fmt.Errorf("db must be opened before saving!")
}
err := db.View(func(tx *bolt.Tx) error {
var err error
b := tx.Bucket([]byte(bucketName))
if b == nil {
return nil
}
k := []byte("InTheatersGroup")
val := b.Get(k)
if val == nil {
return nil
}
err = p.decode(val)
if err != nil {
return err
}
return nil
})
if err != nil {
fmt.Printf("Could not get InTheatersGroup: %s", err)
return err
}
return nil
}
func (p *InTheatersGroup) save() error {
bucketName := Today()
if !open {
return fmt.Errorf("db must be opened before saving!")
}
err := db.Update(func(tx *bolt.Tx) error {
bucket, err := tx.CreateBucketIfNotExists([]byte(bucketName))
if err != nil {
return fmt.Errorf("create bucket: %s", err)
}
enc, err := p.encode()
if err != nil {
return fmt.Errorf("could not encode InTheatersGroup: %s", err)
}
err = bucket.Put([]byte("InTheatersGroup"), enc)
return err
})
return err
}
func (p *InTheatersGroup) encode() ([]byte, error) {
enc, err := json.Marshal(p)
if err != nil {
return nil, err
}
return enc, nil
}
func (p *InTheatersGroup) decode(data []byte) error {
err := json.Unmarshal(data, &p)
if err != nil {
return err
}
return nil
}
// Define the spider
var InTheatersSpider spider.Spider
func init() {
InTheatersSpider = spider.Get("http://www.metacritic.com/browse/movies/release-date/theaters/date", func(ctx *spider.Context) error {
fmt.Print(time.Now())
fmt.Println("InTheatersSpider")
if _, err := ctx.DoRequest(); err != nil {
return err
}
htmlparser, err := ctx.HTMLParser()
if err != nil {
return err
}
var p InTheatersGroup
p.Data = []Movie{}
htmlparser.Find(`div[class="product_wrap group_product_wrap"]`).Each(func(i int, s *goquery.Selection) {
title := strings.TrimSpace(s.Find("a").Text())
score, _ := strconv.Atoi(s.Find(`div[class*="metascore_w"]`).Text())
date := strings.TrimSpace(s.Find(`span[class="data"]`).Text())
_, month, _ := time.Now().Date()
curMonth := month.String()[0:3]
_, month, _ = time.Now().Add(time.Hour * -24 * 30).Date()
lastMonth := month.String()[0:3]
if (strings.Contains(date, curMonth) || strings.Contains(date, lastMonth)) && score > 60 {
urlComponent := strings.Join(strings.Split("showtimes "+title, " "), "+")
url := "https://www.google.com/?q=" + urlComponent + "#safe=active&q=" + urlComponent
// fmt.Printf("%d: %s %d %s %s\n", i, title, score, date, url)
p.Data = append(p.Data, Movie{title, date, score, url})
}
})
// Open()
err = p.save()
// Close()
if err != nil {
return fmt.Errorf("error saving InTheatersGroup")
}
return nil
})
}