/
filters.go
111 lines (85 loc) · 1.82 KB
/
filters.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
package html2
import (
"net/url"
"strings"
"image"
"io"
)
func findFilters(f interface{}, filters *Filters) {
f1, ok := f.(URLFilter)
if ok {
filters.URLFilters = append(filters.URLFilters, f1)
}
f2, ok := f.(FileFilter)
if ok {
filters.FileFilters = append(filters.FileFilters, f2)
}
f3, ok := f.(FollowRule)
if ok {
filters.FollowRules = append(filters.FollowRules, f3)
}
}
// SIZE FILTER
type SizeFilter struct {
Minimum, Maximum int
}
func (sf *SizeFilter) FilterFile(download *Download) bool {
if download.bytes.Len() < sf.Minimum || download.bytes.Len() > sf.Maximum {
return false
}
return true
}
// PAGE EXTENSION FILTER
type PageExtensionFilter struct {
Extensions []string
Allowed bool
}
func (pef *PageExtensionFilter) FilterURL(url, parent *url.URL) bool {
for _, ext := range pef.Extensions {
if url.Fragment == ext {
return pef.Allowed
}
}
return !pef.Allowed
}
// KEYWORD RULE
type KeywordRuleFilter struct {
Whitelist []string
Blacklist []string
}
func (kw *KeywordRuleFilter) FilterURL(url, parent *url.URL) bool {
upper := strings.ToUpper(url.String())
// Black list takes precedence
for _, b := range kw.Blacklist {
if strings.Contains(upper, strings.ToUpper(b)) {
return false
}
}
for _, w := range kw.Whitelist {
if strings.Contains(upper, strings.ToUpper(w)) {
return true
}
}
// If no white listed terms were found, fail
if len(kw.Whitelist) > 0 {
return false
}
return true
}
// No cross site crawl
type NoCrossSiteCrawl struct {
}
func (ncsc *NoCrossSiteCrawl) FilterURL(url, parent *url.URL) bool {
if url.Host != parent.Host {
return false
}
return true
}
//
func getImageDimension(r io.Reader) (int, int, error) {
image, _, err := image.DecodeConfig(r)
if err != nil {
return -1, -1, err
}
return image.Width, image.Height, nil
}