コード例 #1
0
ファイル: response.go プロジェクト: moomerman/whois
// DetectCharset returns best guess for the reesponse body character set.
func (res *Response) DetectCharset() {
	// Detect via BOM / HTML meta tag
	_, cs1, ok1 := charset.DetermineEncoding(res.Body, res.MediaType)

	// Detect via ICU
	cs2, ok2, html := "", false, false
	var det *chardet.Detector
	if strings.Contains(res.MediaType, "html") || true {
		det = chardet.NewHtmlDetector()
		html = true
	} else {
		det = chardet.NewTextDetector()
	}
	r, err := det.DetectAll(res.Body)
	if err == nil && len(r) > 0 {
		cs2 = strings.ToLower(r[0].Charset)
		ok2 = r[0].Confidence > 50
	}

	// Prefer charset if HTML, otherwise ICU
	if !ok2 && (ok1 || html) {
		res.Charset = cs1
	} else {
		res.Charset = cs2
	}

	// fmt.Printf("Detected charset via go.net/html/charset: %s (%t)\n", cs1, ok1)
	// fmt.Printf("Detected charset via saintfish/chardet:   %s (%d)\n", cs2, r[0].Confidence)
}
コード例 #2
0
ファイル: net.go プロジェクト: yetist/xmppbot
func GetUTF8HtmlTitle(str string) string {
	var e encoding.Encoding
	var name string

	e, name, _ = charset.DetermineEncoding([]byte(str), "text/html")
	if name == "windows-1252" {
		e, name, _ = charset.DetermineEncoding([]byte(str), "text/html;charset=gbk")
	}
	r := transform.NewReader(strings.NewReader(str), e.NewDecoder())
	if b, err := ioutil.ReadAll(r); err != nil {
		return ""
	} else {
		return getHtmlTitle(string(b))
	}
	return ""
}
コード例 #3
0
ファイル: fetch.go プロジェクト: Yellow79/sandblast
// Returns the body of resp as a decoded string, detecting its encoding
func DecodedBody(resp *http.Response) (content []byte, encoding string, err error) {
	defer resp.Body.Close()
	body, err := ioutil.ReadAll(resp.Body)
	if err != nil && err != io.EOF {
		content = body
		return
	}
	e, encoding, _ := charset.DetermineEncoding(body, resp.Header.Get("Content-Type"))
	t := e.NewDecoder()
	content = make([]byte, len(body))
	start := 0
	for {
		var nDst, nSrc int
		nDst, nSrc, err = t.Transform(content[start:], body, true)
		body = body[nSrc:]
		start += nDst
		switch err {
		case transform.ErrShortDst:
			newContent := make([]byte, len(content)*2)
			copy(newContent, content)
			content = newContent
		case transform.ErrShortSrc:
			return
		default:
			content = content[:start]
			return
		}
	}
	return
}
コード例 #4
0
ファイル: body_parser.go プロジェクト: ReanGD/go-web-search
func bodyToUTF8(body []byte, contentType string) (*transform.Reader, error) {
	enc, _, _ := charset.DetermineEncoding(body, contentType)
	if enc == encoding.Nop {
		return nil, werrors.New(ErrEncodingNotFound)
	}

	return transform.NewReader(bytes.NewReader(body), enc.NewDecoder()), nil
}
コード例 #5
0
ファイル: type.go プロジェクト: otiai10/goquery
func parseReader(r io.Reader, url *url.URL) (*Document, error) {
	b, _ := ioutil.ReadAll(r)
	enc, _, _ := charset.DetermineEncoding(b, "text/html")
	root, e := html.Parse(bytes.NewReader(b))
	if e != nil {
		return nil, e
	}
	return newDocument(root, url, enc.NewDecoder()), nil
}
コード例 #6
0
ファイル: encoding.go プロジェクト: jpfielding/gorets
// ReEncodeReader re-encodes a reader based on the http content type provided
func ReEncodeReader(input io.ReadCloser, contentType string) io.ReadCloser {
	if e, _, _ := charset.DetermineEncoding([]byte{}, contentType); e != encoding.Nop {
		type closer struct {
			io.Reader
			io.Closer
		}
		tr := transform.NewReader(input, e.NewDecoder())
		return closer{tr, input}
	}
	return input
}
コード例 #7
0
ファイル: mail.go プロジェクト: kaey/mail
// DecodeCharset detects charset of str decodes it.
func decodeCharset(str, label string) (nstr string, err error) {
	enc, _ := charset.Lookup(label)
	if enc == nil {
		enc, _, _ = charset.DetermineEncoding([]byte(str), "text/plain")
	}

	nstr, _, err = transform.String(enc.NewDecoder(), str)
	if err != nil {
		return nstr, err
	}

	return stripNonUTF8(nstr), nil
}
コード例 #8
0
func main() {
	content := Gethtml("http://www.jb51.net/")
	file.WriteStringToFile(string(content), "./gbk.html")
	//content := Gethtml("http://www.baidu.com")
	e, n, c := charset.DetermineEncoding(content, "text/html")
	println(e)
	println(n)
	println(c)
	if n != "utf-8" {
		if e != nil {
			s, err := transformString(e.NewDecoder(), string(content))
			if err == nil {
				file.WriteStringToFile(s, "./out.html")
			}
		}
	}
}
コード例 #9
0
ファイル: fetch.go プロジェクト: fanyang01/crawler
func (r *Response) convToUTF8(preview []byte, query func(*url.URL) string) {
	// Convert to UTF-8
	if media.IsHTML(r.ContentType) {
		e, name, certain := charset.DetermineEncoding(
			preview, r.ContentType,
		)
		// according to charset package source, default unknown charset is windows-1252.
		if !certain && name == "windows-1252" {
			if e, name = charset.Lookup(query(r.URL)); e != nil {
				certain = true
			}
		}
		r.Charset, r.CertainCharset, r.Encoding = name, certain, e
		if name != "" && e != nil {
			r.Body, _ = util.NewUTF8Reader(name, r.Body)
		}
	}
}
コード例 #10
0
ファイル: utils.go プロジェクト: kissthink/goread
func encodingReader(body []byte, contentType string) (encoding.Encoding, error) {
	preview := make([]byte, 1024)
	var r io.Reader = bytes.NewReader(body)
	n, err := io.ReadFull(r, preview)
	switch {
	case err == io.ErrUnexpectedEOF:
		preview = preview[:n]
		r = bytes.NewReader(preview)
	case err != nil:
		return nil, err
	default:
		r = io.MultiReader(bytes.NewReader(preview), r)
	}

	e, _, certain := charset.DetermineEncoding(preview, contentType)
	if !certain && e == charmap.Windows1252 && utf8.Valid(body) {
		e = encoding.Nop
	}
	return e, nil
}
コード例 #11
0
ファイル: iteration.go プロジェクト: anxiousmodernman/cli
func readFileAsUTF8String(filename string) (*string, error) {
	b, err := ioutil.ReadFile(filename)
	if err != nil {
		return nil, err
	}

	encoding, _, _ := charset.DetermineEncoding(b, mimeType)
	decoder := encoding.NewDecoder()
	decodedBytes, _, err := transform.Bytes(decoder, b)
	if err != nil {
		return nil, err
	}

	// Drop the UTF-8 BOM that may have been added. This isn't necessary, and
	// it's going to be written into another UTF-8 buffer anyway once it's JSON
	// serialized.
	//
	// The standard recommends omitting the BOM. See
	// http://www.unicode.org/versions/Unicode5.0.0/ch02.pdf
	decodedBytes = bytes.TrimPrefix(decodedBytes, utf8BOM)

	s := string(decodedBytes)
	return &s, nil
}
コード例 #12
0
ファイル: qa.go プロジェクト: Yellow79/sandblast
func extractTest(test test, writeextract bool) ([]byte, string) {
	in, err := test.input.Open()
	must(err)
	defer in.Close()

	body, err := ioutil.ReadAll(in)
	must(err)

	e, _, _ := charset.DetermineEncoding(body, "UTF-8")
	r := transform.NewReader(bytes.NewReader(body), e.NewDecoder())
	node, err := html.Parse(r)
	must(err)

	_, output, simplified, flattened, cleaned, err := sandblast.ExtractEx(node, 0)
	must(err)

	if writeextract {
		fmt.Printf("SIMPLIFIED:\n%s\n", simplified.DebugString())
		fmt.Printf("FLATTENED:\n%s\n", flattened.DebugString())
		fmt.Printf("CLEANED:\n%s\n", cleaned.DebugString())
	}

	return body, output
}
コード例 #13
0
ファイル: api.go プロジェクト: pawelszydlo/papa-bot
// GetPageBody gets and returns a body of a page.
func (bot *Bot) GetPageBody(urlinfo *UrlInfo, customHeaders map[string]string) error {
	if urlinfo.URL == "" {
		return errors.New("Empty URL")
	}
	// Build the request.
	req, err := http.NewRequest("GET", urlinfo.URL, nil)
	if err != nil {
		return err
	}
	if customHeaders["User-Agent"] == "" {
		customHeaders["User-Agent"] = bot.Config.HttpDefaultUserAgent
	}
	for k, v := range customHeaders {
		req.Header.Set(k, v)
	}

	// Get response.
	resp, err := bot.HTTPClient.Do(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()

	// Update the URL if it changed after redirects.
	final_link := resp.Request.URL.String()
	if final_link != "" && final_link != urlinfo.URL {
		bot.Log.Debugf("%s becomes %s", urlinfo.URL, final_link)
		urlinfo.URL = final_link
	}

	// Load the body up to PageBodyMaxSize.
	body := make([]byte, bot.Config.PageBodyMaxSize, bot.Config.PageBodyMaxSize)
	if num, err := io.ReadFull(resp.Body, body); err != nil && err != io.ErrUnexpectedEOF {
		return err
	} else {
		// Trim unneeded 0 bytes so that JSON unmarshaller won't complain.
		body = body[:num]
	}
	// Get the content-type
	contentType := resp.Header.Get("Content-Type")
	if contentType == "" {
		contentType = http.DetectContentType(body)
	}
	urlinfo.ContentType = contentType

	// If type is text, decode the body to UTF-8.
	if strings.Contains(contentType, "text/") {
		// Try to get more significant part for encoding detection.
		sample := bytes.Join(bot.webContentSampleRe.FindAll(body, -1), []byte{})
		if len(sample) < 100 {
			sample = body
		}
		// Unescape HTML tokens.
		sample = []byte(html.UnescapeString(string(sample)))
		// Try to only get charset from content type. Needed because some pages serve broken Content-Type header.
		detectionContentType := contentType
		tokens := strings.Split(contentType, ";")
		for _, t := range tokens {
			if strings.Contains(strings.ToLower(t), "charset") {
				detectionContentType = "text/plain; " + t
				break
			}
		}
		// Detect encoding and transform.
		encoding, _, _ := charset.DetermineEncoding(sample, detectionContentType)
		decodedBody, _, _ := transform.Bytes(encoding.NewDecoder(), body)
		urlinfo.Body = decodedBody
	} else if strings.Contains(contentType, "application/json") {
		urlinfo.Body = body
	} else {
		bot.Log.Debugf("Not fetching the body for Content-Type: %s", contentType)
	}
	return nil
}
コード例 #14
0
ファイル: proxy.go プロジェクト: vishnuvaradaraj/redwood
func (h ProxyHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
	activeConnections.Add(1)
	defer activeConnections.Done()

	conf := GetConfig()

	if !conf.ACLsLoaded {
		http.Error(w, "Redwood proxy configuration needs to be updated for this version of Redwood.\n(Use ACLs)", 500)
		return
	}

	if len(r.URL.String()) > 10000 {
		http.Error(w, "URL too long", http.StatusRequestURITooLong)
		return
	}

	client := r.RemoteAddr
	host, _, err := net.SplitHostPort(client)
	if err == nil {
		client = host
	}

	if conf.AuthCacheTime > 0 {
		auth := r.Header.Get("Proxy-Authorization")
		if auth == "" {
			authCacheLock.RLock()
			ar, ok := authCache[client]
			authCacheLock.RUnlock()
			if ok && time.Now().Sub(ar.Time) < time.Duration(conf.AuthCacheTime)*time.Second {
				r.Header.Set("Proxy-Authorization", ar.ProxyAuthorization)
			}
		} else {
			authCacheLock.Lock()
			authCache[client] = authRecord{
				ProxyAuthorization: auth,
				Time:               time.Now(),
			}
			authCacheLock.Unlock()
		}
	}

	if r.Header.Get("Proxy-Authorization") != "" {
		user, pass := ProxyCredentials(r)
		if !conf.ValidCredentials(user, pass) {
			log.Printf("Incorrect username or password from %v: %q:%q", r.RemoteAddr, user, pass)
			r.Header.Del("Proxy-Authorization")
		}
	}

	// Reconstruct the URL if it is incomplete (i.e. on a transparent proxy).
	if r.URL.Host == "" {
		r.URL.Host = r.Host
	}
	if r.URL.Scheme == "" {
		if h.TLS {
			r.URL.Scheme = "https"
		} else {
			r.URL.Scheme = "http"
		}
	}

	var userAgent string
	if conf.LogUserAgent {
		userAgent = r.Header.Get("User-Agent")
	}

	if realHost, ok := conf.VirtualHosts[r.Host]; ok {
		r.Host = realHost
		r.URL.Host = realHost
	}

	user := client
	var authUser string
	if h.user != "" {
		authUser = h.user
	} else if u, _ := ProxyCredentials(r); u != "" {
		authUser = u
	}
	if authUser != "" {
		user = authUser
	}

	tally := conf.URLRules.MatchingRules(r.URL)
	scores := conf.categoryScores(tally)
	categories := conf.significantCategories(scores)

	reqACLs := conf.ACLs.requestACLs(r, authUser)

	possibleActions := []string{
		"allow",
		"block",
		"block-invisible",
	}
	if r.Header.Get("Proxy-Authorization") == "" && !h.TLS {
		possibleActions = append(possibleActions, "require-auth")
	}
	if r.Method == "CONNECT" && conf.TLSReady {
		possibleActions = append(possibleActions, "ssl-bump")
	}

	thisRule, ignored := conf.ChooseACLCategoryAction(reqACLs, categories, possibleActions...)
	if r.Method == "CONNECT" && conf.TLSReady && thisRule.Action == "" {
		// If the result is unclear, go ahead and start to bump the connection.
		// The ACLs will be checked one more time anyway.
		thisRule.Action = "ssl-bump"
	}

	switch thisRule.Action {
	case "require-auth":
		conf.send407(w)
		log.Printf("Missing required proxy authentication from %v to %v", r.RemoteAddr, r.URL)
		return
	case "block":
		conf.showBlockPage(w, r, user, tally, scores, thisRule)
		logAccess(r, nil, 0, false, user, tally, scores, thisRule, "", ignored, userAgent)
		return
	case "block-invisible":
		showInvisibleBlock(w)
		logAccess(r, nil, 0, false, user, tally, scores, thisRule, "", ignored, userAgent)
		return
	case "ssl-bump":
		conn, err := newHijackedConn(w)
		if err != nil {
			fmt.Fprintln(conn, "HTTP/1.1 500 Internal Server Error")
			fmt.Fprintln(conn)
			fmt.Fprintln(conn, err)
			conn.Close()
			return
		}
		fmt.Fprint(conn, "HTTP/1.1 200 Connection Established\r\n\r\n")
		SSLBump(conn, r.URL.Host, user, authUser)
		return
	}

	if r.Host == localServer {
		conf.ServeMux.ServeHTTP(w, r)
		return
	}

	if r.Method == "CONNECT" {
		conn, err := newHijackedConn(w)
		if err != nil {
			fmt.Fprintln(conn, "HTTP/1.1 500 Internal Server Error")
			fmt.Fprintln(conn)
			fmt.Fprintln(conn, err)
			conn.Close()
			return
		}
		fmt.Fprint(conn, "HTTP/1.1 200 Connection Established\r\n\r\n")
		logAccess(r, nil, 0, false, user, tally, scores, thisRule, "", ignored, userAgent)
		connectDirect(conn, r.URL.Host, nil)
		return
	}

	if r.Header.Get("Upgrade") == "websocket" {
		h.makeWebsocketConnection(w, r)
		return
	}

	r.Header.Add("Via", r.Proto+" Redwood")
	r.Header.Add("X-Forwarded-For", client)

	gzipOK := !conf.DisableGZIP && strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") && !lanAddress(client)
	r.Header.Del("Accept-Encoding")

	urlChanged := conf.changeQuery(r.URL)

	if !urlChanged {
		// Rebuild the URL in a way that will preserve which characters are escaped
		// and which aren't, for compatibility with broken servers.
		rawURL := r.RequestURI
		if strings.HasPrefix(rawURL, r.URL.Scheme) {
			rawURL = rawURL[len(r.URL.Scheme):]
			rawURL = strings.TrimPrefix(rawURL, "://")
			slash := strings.Index(rawURL, "/")
			if slash == -1 {
				rawURL = "/"
			} else {
				rawURL = rawURL[slash:]
			}
		}
		q := strings.Index(rawURL, "?")
		if q != -1 {
			rawURL = rawURL[:q]
		}
		if strings.HasPrefix(rawURL, "//") {
			// The path should start with a single slash not two.
			rawURL = rawURL[1:]
		}
		r.URL.Opaque = rawURL
	}

	proxied := false
	var rt http.RoundTripper
	if h.rt == nil {
		if r.URL.Opaque != "" && transport.Proxy != nil {
			if p, _ := transport.Proxy(r); p != nil {
				// If the request is going through a proxy, the host needs to be
				// included in the opaque element.
				r.URL.Opaque = "//" + r.URL.Host + r.URL.Opaque
				proxied = true
			}
		}
		rt = &transport
	} else {
		rt = h.rt
	}

	if !proxied {
		r.Header.Del("Proxy-Authorization")
	}
	resp, err := rt.RoundTrip(r)
	r.URL.Opaque = ""

	if err != nil {
		http.Error(w, err.Error(), http.StatusServiceUnavailable)
		log.Printf("error fetching %s: %s", r.URL, err)
		logAccess(r, nil, 0, false, user, tally, scores, thisRule, "", ignored, userAgent)
		return
	}
	defer resp.Body.Close()

	// Prevent switching to QUIC.
	resp.Header.Del("Alternate-Protocol")

	originalContentType := resp.Header.Get("Content-Type")
	fixContentType(resp)

	respACLs := conf.ACLs.responseACLs(resp)
	acls := unionACLSets(reqACLs, respACLs)
	thisRule, ignored = conf.ChooseACLCategoryAction(acls, categories, "allow", "block", "block-invisible", "hash-image", "phrase-scan")
	if thisRule.Action == "" {
		thisRule.Action = "allow"
	}

	switch thisRule.Action {
	case "allow":
		resp.Header.Set("Content-Type", originalContentType)
		copyResponseHeader(w, resp)
		n, err := io.Copy(w, resp.Body)
		if err != nil {
			log.Printf("error while copying response (URL: %s): %s", r.URL, err)
		}
		logAccess(r, resp, int(n), false, user, tally, scores, thisRule, "", ignored, userAgent)
		return
	case "block":
		conf.showBlockPage(w, r, user, tally, scores, thisRule)
		logAccess(r, resp, 0, false, user, tally, scores, thisRule, "", ignored, userAgent)
		return
	case "block-invisible":
		showInvisibleBlock(w)
		logAccess(r, resp, 0, false, user, tally, scores, thisRule, "", ignored, userAgent)
		return
	}

	lr := &io.LimitedReader{
		R: resp.Body,
		N: 1e6,
	}
	content, err := ioutil.ReadAll(lr)
	if err != nil {
		log.Printf("error while reading response body (URL: %s): %s", r.URL, err)
	}
	if lr.N == 0 {
		log.Println("response body too long to filter:", r.URL)
		resp.Header.Set("Content-Type", originalContentType)
		var dest io.Writer = w
		if gzipOK {
			resp.Header.Set("Content-Encoding", "gzip")
			resp.Header.Del("Content-Length")
			gzw := gzip.NewWriter(w)
			defer gzw.Close()
			dest = gzw
		}
		copyResponseHeader(w, resp)
		dest.Write(content)
		n, err := io.Copy(dest, resp.Body)
		if err != nil {
			log.Printf("error while copying response (URL: %s): %s", r.URL, err)
		}
		logAccess(r, resp, int(n)+len(content), false, user, tally, scores, ACLActionRule{Action: "allow", Needed: []string{"too-long-to-filter"}}, "", ignored, userAgent)
		return
	}

	modified := false
	pageTitle := ""

	switch thisRule.Action {
	case "phrase-scan":
		contentType := resp.Header.Get("Content-Type")
		_, cs, _ := charset.DetermineEncoding(content, contentType)
		if strings.Contains(contentType, "html") {
			var doc *html.Node
			if conf.LogTitle {
				doc, err = parseHTML(content, cs)
				if err != nil {
					log.Printf("Error parsing HTML from %s: %s", r.URL, err)
				} else {
					t := titleSelector.MatchFirst(doc)
					if t != nil {
						if titleText := t.FirstChild; titleText != nil && titleText.Type == html.TextNode {
							pageTitle = titleText.Data
						}
					}
				}
			}

			modified = conf.pruneContent(r.URL, &content, cs, acls, doc)
			if modified {
				resp.Header.Set("Content-Type", "text/html; charset=utf-8")
				cs = "utf-8"
				resp.Header.Del("Content-Length")
			}
		}

		conf.scanContent(content, contentType, cs, tally)

	case "hash-image":
		img, _, err := image.Decode(bytes.NewReader(content))
		if err != nil {
			log.Printf("Error decoding image from %v: %v", r.URL, err)
			break
		}
		hash := dhash.New(img)

		for _, h := range conf.ImageHashes {
			if dhash.Distance(hash, h) <= conf.DhashThreshold {
				tally[rule{imageHash, h.String()}]++
			}
		}
	}

	scores = conf.categoryScores(tally)
	categories = conf.significantCategories(scores)
	thisRule, ignored = conf.ChooseACLCategoryAction(acls, categories, "allow", "block", "block-invisible")
	if thisRule.Action == "" {
		thisRule.Action = "allow"
	}

	switch thisRule.Action {
	case "block":
		conf.showBlockPage(w, r, user, tally, scores, thisRule)
		logAccess(r, resp, len(content), modified, user, tally, scores, thisRule, pageTitle, ignored, userAgent)
		return
	case "block-invisible":
		showInvisibleBlock(w)
		logAccess(r, resp, len(content), modified, user, tally, scores, thisRule, pageTitle, ignored, userAgent)
		return
	}

	if !modified {
		resp.Header.Set("Content-Type", originalContentType)
	}

	if gzipOK && len(content) > 1000 {
		resp.Header.Set("Content-Encoding", "gzip")
		resp.Header.Del("Content-Length")
		copyResponseHeader(w, resp)
		gzw := gzip.NewWriter(w)
		gzw.Write(content)
		gzw.Close()
	} else {
		copyResponseHeader(w, resp)
		w.Write(content)
	}

	logAccess(r, resp, len(content), modified, user, tally, scores, thisRule, pageTitle, ignored, userAgent)
}
コード例 #15
0
ファイル: testmode.go プロジェクト: soccerties/redwood
// runURLTest prints debugging information about how the URL and its content would be rated.
func runURLTest(u string) {
	conf := getConfig()

	URL, err := url.Parse(u)
	if err != nil {
		fmt.Println("Could not parse the URL.")
		return
	}

	if URL.Scheme == "" {
		url2, err := url.Parse("http://" + u)
		if err == nil {
			URL = url2
		}
	}

	fmt.Println("URL:", URL)
	fmt.Println()

	tally := conf.URLRules.MatchingRules(URL)
	scores := conf.categoryScores(tally)
	categories := conf.significantCategories(scores)

	if len(tally) == 0 {
		fmt.Println("No URL rules match.")
	} else {
		fmt.Println("The following URL rules match:")
		for s, _ := range tally {
			fmt.Println(s)
		}
	}

	if len(scores) > 0 {
		fmt.Println()
		fmt.Println("The request has the following category scores:")
		printSortedTally(scores)
	}

	req := &http.Request{
		Method: "GET",
		URL:    URL,
		Header: make(http.Header),
	}
	reqACLs := conf.ACLs.requestACLs(req, "")
	if len(reqACLs) > 0 {
		fmt.Println()
		fmt.Println("The request matches the following ACLs:")
		for acl := range reqACLs {
			fmt.Println(acl)
		}
	}

	thisRule, ignored := conf.ChooseACLCategoryAction(reqACLs, categories, "allow", "block", "block-invisible")
	fmt.Println()
	if thisRule.Action == "" {
		fmt.Println("No ACL rule was triggered.")
	} else {
		fmt.Println("Triggered rule:", thisRule.Action, thisRule.Conditions())
		if len(ignored) > 0 {
			fmt.Println("Ignored categories:", strings.Join(ignored, ", "))
		}
	}

	if conf.changeQuery(URL) {
		fmt.Println()
		fmt.Println("URL modified to:", URL)
	}

	fmt.Println()
	fmt.Println("Downloading content...")
	resp, err := http.DefaultTransport.RoundTrip(req)
	if err != nil {
		fmt.Println(err)
		return
	}
	defer resp.Body.Close()

	fmt.Println(resp.Status)
	fmt.Println()

	fixContentType(resp)
	respACLs := conf.ACLs.responseACLs(resp)
	acls := unionACLSets(reqACLs, respACLs)

	if len(respACLs) > 0 {
		fmt.Println("The response matches the following ACLs:")
		for acl := range respACLs {
			fmt.Println(acl)
		}
		fmt.Println()
	}

	thisRule, ignored = conf.ChooseACLCategoryAction(acls, categories, "allow", "block", "block-invisible", "hash-image", "phrase-scan")

	if thisRule.Action == "" {
		fmt.Println("No ACL rule was triggered.")
	} else {
		fmt.Println("Triggered rule:", thisRule.Action, thisRule.Conditions())
		if len(ignored) > 0 {
			fmt.Println("Ignored categories:", strings.Join(ignored, ", "))
		}
	}

	if thisRule.Action != "phrase-scan" && thisRule.Action != "hash-image" {
		return
	}
	fmt.Println()

	contentType := resp.Header.Get("Content-Type")

	content, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		fmt.Println("Error while reading response body:", err)
		return
	}

	var doc *html.Node
	switch thisRule.Action {
	case "phrase-scan":
		modified := false
		_, cs, _ := charset.DetermineEncoding(content, resp.Header.Get("Content-Type"))
		if strings.Contains(contentType, "html") {
			modified = conf.pruneContent(URL, &content, cs, acls, &doc)
		}
		if modified {
			cs = "utf-8"
			fmt.Println("Performed content pruning.")
			fmt.Println()
		}

		conf.scanContent(content, contentType, cs, tally)
		if len(tally) == 0 {
			fmt.Println("No content phrases match.")
		} else {
			fmt.Println("The following rules match:")
			printSortedTally(stringTally(tally))
		}

	case "hash-image":
		img, _, err := image.Decode(bytes.NewReader(content))
		if err != nil {
			fmt.Printf("Error decoding image: %v\n", err)
			return
		}
		hash := dhash.New(img)
		fmt.Println("The image's hash is", hash)

		for _, h := range conf.ImageHashes {
			distance := dhash.Distance(hash, h.Hash)
			if distance <= h.Threshold || h.Threshold == -1 && distance <= conf.DhashThreshold {
				tally[rule{imageHash, h.String()}]++
				fmt.Printf("Matching image hash found: %v (%d bits difference)\n", h, distance)
			}
		}
	}

	scores = conf.categoryScores(tally)
	categories = conf.significantCategories(scores)

	if len(scores) > 0 {
		fmt.Println()
		fmt.Println("The response has the following category scores:")
		printSortedTally(scores)
	}
	fmt.Println()

	thisRule, ignored = conf.ChooseACLCategoryAction(acls, categories, "allow", "block", "block-invisible")

	if thisRule.Action == "" {
		fmt.Println("No ACL rule was triggered.")
	} else {
		fmt.Println("Triggered rule:", thisRule.Action, thisRule.Conditions())
		if len(ignored) > 0 {
			fmt.Println("Ignored categories:", strings.Join(ignored, ", "))
		}
	}
}
コード例 #16
0
ファイル: tool.go プロジェクト: rothgar/gogs
func DetectEncoding(content []byte) string {
	_, name, _ := charset.DetermineEncoding(content, setting.Repository.AnsiCharset)
	return name
}