/
2kcookies.go
195 lines (178 loc) · 4.98 KB
/
2kcookies.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
/*
Author: rodr <rodr<at>dpustudios.com>
Binary 2kcookies implements a simple web scraper and dynamodb populator for 2k.com cookeis and csrf state tokens.
The intent of this code was not to create a really fast efficient scraper, it was instead to politely store tens of thousands of operational security data points from a given website.
Author is not responsible for your use of this software, and reminds you to be nice to others.
*/
package main
import (
"flag"
"fmt"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/dynamodb"
"golang.org/x/net/html"
"io"
"net/http"
"net/url"
"strconv"
"time"
)
const (
table_name = "2kcookies"
timeLongForm = "Mon, 2 Jan 2006 15:04:05 GMT"
)
// This is just a simple container for our cookie jar
type CookieManager struct {
jar map[string][]*http.Cookie
}
// This is an unused result
type CookieResult struct {
Name string
Value string
Time int64
}
// SetCookies - implements a set cookie method to be executed by net/http
func (p *CookieManager) SetCookies(u *url.URL, cookies []*http.Cookie) {
p.jar[u.Host] = cookies
}
// Cookies - returns the cookies for a given url
func (p *CookieManager) Cookies(u *url.URL) []*http.Cookie {
return p.jar[u.Host]
}
// GetCookie - given a CookieManager this retrieves teh cookies from 2k.com
func GetCookie(j *CookieManager) (*http.Response, error) {
client := &http.Client{}
client.Jar = j
req, err := http.NewRequest("GET", "http://2k.com", nil)
if err != nil {
fmt.Println("We have an error in request")
return nil, err
}
resp, err := client.Do(req)
if err != nil {
fmt.Println("We have an error in request")
return nil, err
}
return resp, nil
}
// InputItem - this is the final object we pass to ProcessCookies
type InputItem struct {
CookieManager
Time string
TableName string
HrefData map[string]string
}
// ProcessCookies - this parses the 2kgames cookies passed extracing neccessary values
// given an InputItem return a dynamodb.PutItem object
func ProcessCookies(inputItem *InputItem) *dynamodb.PutItemInput {
var params *dynamodb.PutItemInput
for _, item := range inputItem.CookieManager.jar {
for _, cookie := range item {
if cookie.Name != "2K" {
continue
}
state := inputItem.HrefData["state"]
client_id := inputItem.HrefData["client_id"]
params = &dynamodb.PutItemInput{
TableName: &inputItem.TableName,
Item: map[string]*dynamodb.AttributeValue{
"timestamp": {
N: &inputItem.Time,
},
"cookie_name": {
S: &cookie.Name,
},
"cookie_value": {
S: &cookie.Value,
},
"state": {
S: &state,
},
"client_id": {
S: &client_id,
},
},
} // parms end
}
}
return params
}
// scrapePageWorker -- this is the function that does most of the work in parsing the HTML
func scrapePageWorker(page *io.ReadCloser, out chan [2]string, chFinished chan bool) {
defer func() {
chFinished <- true
}()
z := html.NewTokenizer(*page)
// infinite loop to toss state tokens into a url map
for {
var result [2]string
tt := z.Next()
switch {
case tt == html.ErrorToken:
return
case tt == html.StartTagToken:
t := z.Token()
isAnchor := t.Data == "a"
if !isAnchor {
continue
}
if isAnchor {
for _, attr := range t.Attr {
if attr.Key == "id" {
result[0] = attr.Val
}
if attr.Key == "data-href" {
result[1] = attr.Val
out <- result
}
}
}
}
} // end for
}
// ScrapePage - This grabs the values we care about from the 2kgames html itself
func ScrapePage(page *io.ReadCloser) map[string]string {
out := make(chan [2]string)
chFinished := make(chan bool)
go scrapePageWorker(page, out, chFinished)
urls := make(map[string]string)
// iterate thru channel results
// close when worker function closes
select {
case u := <-out:
actual_url, _ := url.Parse(u[1])
url_vals := actual_url.Query()
// set the values we care about
urls["state"] = url_vals["state"][0]
urls["client_id"] = url_vals["client_id"][0]
case <-chFinished:
break
}
return urls
}
func main() {
svc := dynamodb.New(session.New(&aws.Config{Region: aws.String("us-east-1")}))
jar := &CookieManager{}
var cookieCount int
var sleepTime int64
flag.IntVar(&cookieCount, "count", 10, "collect this many cookies")
flag.Int64Var(&sleepTime, "sleep", 2, "sleep this many between executions")
flag.Parse()
for i := 0; i <= cookieCount; i++ {
jar.jar = make(map[string][]*http.Cookie)
if resp, err := GetCookie(jar); err == nil {
t, _ := time.Parse(timeLongForm, resp.Header["Date"][0])
time_string := strconv.FormatInt(t.Unix(), 10)
body := resp.Body
params := ProcessCookies(&InputItem{*jar, time_string, table_name, ScrapePage(&body)})
svc.PutItem(params)
} else {
fmt.Println("Failed to get a response body. Will retry after timeout.")
}
if i%5 == 0 && i != 0 {
fmt.Printf("Got %d cookies.\n", i)
}
time.Sleep(time.Duration(sleepTime) * time.Second) // lets hold firm 2s for niceness
}
}