func (h ProxyHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { activeConnections.Add(1) defer activeConnections.Done() conf := GetConfig() if !conf.ACLsLoaded { http.Error(w, "Redwood proxy configuration needs to be updated for this version of Redwood.\n(Use ACLs)", 500) return } if len(r.URL.String()) > 10000 { http.Error(w, "URL too long", http.StatusRequestURITooLong) return } client := r.RemoteAddr host, _, err := net.SplitHostPort(client) if err == nil { client = host } if conf.AuthCacheTime > 0 { auth := r.Header.Get("Proxy-Authorization") if auth == "" { authCacheLock.RLock() ar, ok := authCache[client] authCacheLock.RUnlock() if ok && time.Now().Sub(ar.Time) < time.Duration(conf.AuthCacheTime)*time.Second { r.Header.Set("Proxy-Authorization", ar.ProxyAuthorization) } } else { authCacheLock.Lock() authCache[client] = authRecord{ ProxyAuthorization: auth, Time: time.Now(), } authCacheLock.Unlock() } } if r.Header.Get("Proxy-Authorization") != "" { user, pass := ProxyCredentials(r) if !conf.ValidCredentials(user, pass) { log.Printf("Incorrect username or password from %v: %q:%q", r.RemoteAddr, user, pass) r.Header.Del("Proxy-Authorization") } } // Reconstruct the URL if it is incomplete (i.e. on a transparent proxy). if r.URL.Host == "" { r.URL.Host = r.Host } if r.URL.Scheme == "" { if h.TLS { r.URL.Scheme = "https" } else { r.URL.Scheme = "http" } } var userAgent string if conf.LogUserAgent { userAgent = r.Header.Get("User-Agent") } if realHost, ok := conf.VirtualHosts[r.Host]; ok { r.Host = realHost r.URL.Host = realHost } user := client var authUser string if h.user != "" { authUser = h.user } else if u, _ := ProxyCredentials(r); u != "" { authUser = u } if authUser != "" { user = authUser } tally := conf.URLRules.MatchingRules(r.URL) scores := conf.categoryScores(tally) categories := conf.significantCategories(scores) reqACLs := conf.ACLs.requestACLs(r, authUser) possibleActions := []string{ "allow", "block", "block-invisible", } if r.Header.Get("Proxy-Authorization") == "" && !h.TLS { possibleActions = append(possibleActions, "require-auth") } if r.Method == "CONNECT" && conf.TLSReady { possibleActions = append(possibleActions, "ssl-bump") } thisRule, ignored := conf.ChooseACLCategoryAction(reqACLs, categories, possibleActions...) if r.Method == "CONNECT" && conf.TLSReady && thisRule.Action == "" { // If the result is unclear, go ahead and start to bump the connection. // The ACLs will be checked one more time anyway. thisRule.Action = "ssl-bump" } switch thisRule.Action { case "require-auth": conf.send407(w) log.Printf("Missing required proxy authentication from %v to %v", r.RemoteAddr, r.URL) return case "block": conf.showBlockPage(w, r, user, tally, scores, thisRule) logAccess(r, nil, 0, false, user, tally, scores, thisRule, "", ignored, userAgent) return case "block-invisible": showInvisibleBlock(w) logAccess(r, nil, 0, false, user, tally, scores, thisRule, "", ignored, userAgent) return case "ssl-bump": conn, err := newHijackedConn(w) if err != nil { fmt.Fprintln(conn, "HTTP/1.1 500 Internal Server Error") fmt.Fprintln(conn) fmt.Fprintln(conn, err) conn.Close() return } fmt.Fprint(conn, "HTTP/1.1 200 Connection Established\r\n\r\n") SSLBump(conn, r.URL.Host, user, authUser) return } if r.Host == localServer { conf.ServeMux.ServeHTTP(w, r) return } if r.Method == "CONNECT" { conn, err := newHijackedConn(w) if err != nil { fmt.Fprintln(conn, "HTTP/1.1 500 Internal Server Error") fmt.Fprintln(conn) fmt.Fprintln(conn, err) conn.Close() return } fmt.Fprint(conn, "HTTP/1.1 200 Connection Established\r\n\r\n") logAccess(r, nil, 0, false, user, tally, scores, thisRule, "", ignored, userAgent) connectDirect(conn, r.URL.Host, nil) return } if r.Header.Get("Upgrade") == "websocket" { h.makeWebsocketConnection(w, r) return } r.Header.Add("Via", r.Proto+" Redwood") r.Header.Add("X-Forwarded-For", client) gzipOK := !conf.DisableGZIP && strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") && !lanAddress(client) r.Header.Del("Accept-Encoding") urlChanged := conf.changeQuery(r.URL) if !urlChanged { // Rebuild the URL in a way that will preserve which characters are escaped // and which aren't, for compatibility with broken servers. rawURL := r.RequestURI if strings.HasPrefix(rawURL, r.URL.Scheme) { rawURL = rawURL[len(r.URL.Scheme):] rawURL = strings.TrimPrefix(rawURL, "://") slash := strings.Index(rawURL, "/") if slash == -1 { rawURL = "/" } else { rawURL = rawURL[slash:] } } q := strings.Index(rawURL, "?") if q != -1 { rawURL = rawURL[:q] } if strings.HasPrefix(rawURL, "//") { // The path should start with a single slash not two. rawURL = rawURL[1:] } r.URL.Opaque = rawURL } proxied := false var rt http.RoundTripper if h.rt == nil { if r.URL.Opaque != "" && transport.Proxy != nil { if p, _ := transport.Proxy(r); p != nil { // If the request is going through a proxy, the host needs to be // included in the opaque element. r.URL.Opaque = "//" + r.URL.Host + r.URL.Opaque proxied = true } } rt = &transport } else { rt = h.rt } if !proxied { r.Header.Del("Proxy-Authorization") } resp, err := rt.RoundTrip(r) r.URL.Opaque = "" if err != nil { http.Error(w, err.Error(), http.StatusServiceUnavailable) log.Printf("error fetching %s: %s", r.URL, err) logAccess(r, nil, 0, false, user, tally, scores, thisRule, "", ignored, userAgent) return } defer resp.Body.Close() // Prevent switching to QUIC. resp.Header.Del("Alternate-Protocol") originalContentType := resp.Header.Get("Content-Type") fixContentType(resp) respACLs := conf.ACLs.responseACLs(resp) acls := unionACLSets(reqACLs, respACLs) thisRule, ignored = conf.ChooseACLCategoryAction(acls, categories, "allow", "block", "block-invisible", "hash-image", "phrase-scan") if thisRule.Action == "" { thisRule.Action = "allow" } switch thisRule.Action { case "allow": resp.Header.Set("Content-Type", originalContentType) copyResponseHeader(w, resp) n, err := io.Copy(w, resp.Body) if err != nil { log.Printf("error while copying response (URL: %s): %s", r.URL, err) } logAccess(r, resp, int(n), false, user, tally, scores, thisRule, "", ignored, userAgent) return case "block": conf.showBlockPage(w, r, user, tally, scores, thisRule) logAccess(r, resp, 0, false, user, tally, scores, thisRule, "", ignored, userAgent) return case "block-invisible": showInvisibleBlock(w) logAccess(r, resp, 0, false, user, tally, scores, thisRule, "", ignored, userAgent) return } lr := &io.LimitedReader{ R: resp.Body, N: 1e6, } content, err := ioutil.ReadAll(lr) if err != nil { log.Printf("error while reading response body (URL: %s): %s", r.URL, err) } if lr.N == 0 { log.Println("response body too long to filter:", r.URL) resp.Header.Set("Content-Type", originalContentType) var dest io.Writer = w if gzipOK { resp.Header.Set("Content-Encoding", "gzip") resp.Header.Del("Content-Length") gzw := gzip.NewWriter(w) defer gzw.Close() dest = gzw } copyResponseHeader(w, resp) dest.Write(content) n, err := io.Copy(dest, resp.Body) if err != nil { log.Printf("error while copying response (URL: %s): %s", r.URL, err) } logAccess(r, resp, int(n)+len(content), false, user, tally, scores, ACLActionRule{Action: "allow", Needed: []string{"too-long-to-filter"}}, "", ignored, userAgent) return } modified := false pageTitle := "" switch thisRule.Action { case "phrase-scan": contentType := resp.Header.Get("Content-Type") _, cs, _ := charset.DetermineEncoding(content, contentType) if strings.Contains(contentType, "html") { var doc *html.Node if conf.LogTitle { doc, err = parseHTML(content, cs) if err != nil { log.Printf("Error parsing HTML from %s: %s", r.URL, err) } else { t := titleSelector.MatchFirst(doc) if t != nil { if titleText := t.FirstChild; titleText != nil && titleText.Type == html.TextNode { pageTitle = titleText.Data } } } } modified = conf.pruneContent(r.URL, &content, cs, acls, doc) if modified { resp.Header.Set("Content-Type", "text/html; charset=utf-8") cs = "utf-8" resp.Header.Del("Content-Length") } } conf.scanContent(content, contentType, cs, tally) case "hash-image": img, _, err := image.Decode(bytes.NewReader(content)) if err != nil { log.Printf("Error decoding image from %v: %v", r.URL, err) break } hash := dhash.New(img) for _, h := range conf.ImageHashes { if dhash.Distance(hash, h) <= conf.DhashThreshold { tally[rule{imageHash, h.String()}]++ } } } scores = conf.categoryScores(tally) categories = conf.significantCategories(scores) thisRule, ignored = conf.ChooseACLCategoryAction(acls, categories, "allow", "block", "block-invisible") if thisRule.Action == "" { thisRule.Action = "allow" } switch thisRule.Action { case "block": conf.showBlockPage(w, r, user, tally, scores, thisRule) logAccess(r, resp, len(content), modified, user, tally, scores, thisRule, pageTitle, ignored, userAgent) return case "block-invisible": showInvisibleBlock(w) logAccess(r, resp, len(content), modified, user, tally, scores, thisRule, pageTitle, ignored, userAgent) return } if !modified { resp.Header.Set("Content-Type", originalContentType) } if gzipOK && len(content) > 1000 { resp.Header.Set("Content-Encoding", "gzip") resp.Header.Del("Content-Length") copyResponseHeader(w, resp) gzw := gzip.NewWriter(w) gzw.Write(content) gzw.Close() } else { copyResponseHeader(w, resp) w.Write(content) } logAccess(r, resp, len(content), modified, user, tally, scores, thisRule, pageTitle, ignored, userAgent) }
// runURLTest prints debugging information about how the URL and its content would be rated. func runURLTest(u string) { conf := getConfig() URL, err := url.Parse(u) if err != nil { fmt.Println("Could not parse the URL.") return } if URL.Scheme == "" { url2, err := url.Parse("http://" + u) if err == nil { URL = url2 } } fmt.Println("URL:", URL) fmt.Println() tally := conf.URLRules.MatchingRules(URL) scores := conf.categoryScores(tally) categories := conf.significantCategories(scores) if len(tally) == 0 { fmt.Println("No URL rules match.") } else { fmt.Println("The following URL rules match:") for s, _ := range tally { fmt.Println(s) } } if len(scores) > 0 { fmt.Println() fmt.Println("The request has the following category scores:") printSortedTally(scores) } req := &http.Request{ Method: "GET", URL: URL, Header: make(http.Header), } reqACLs := conf.ACLs.requestACLs(req, "") if len(reqACLs) > 0 { fmt.Println() fmt.Println("The request matches the following ACLs:") for acl := range reqACLs { fmt.Println(acl) } } thisRule, ignored := conf.ChooseACLCategoryAction(reqACLs, categories, "allow", "block", "block-invisible") fmt.Println() if thisRule.Action == "" { fmt.Println("No ACL rule was triggered.") } else { fmt.Println("Triggered rule:", thisRule.Action, thisRule.Conditions()) if len(ignored) > 0 { fmt.Println("Ignored categories:", strings.Join(ignored, ", ")) } } if conf.changeQuery(URL) { fmt.Println() fmt.Println("URL modified to:", URL) } fmt.Println() fmt.Println("Downloading content...") resp, err := http.DefaultTransport.RoundTrip(req) if err != nil { fmt.Println(err) return } defer resp.Body.Close() fmt.Println(resp.Status) fmt.Println() fixContentType(resp) respACLs := conf.ACLs.responseACLs(resp) acls := unionACLSets(reqACLs, respACLs) if len(respACLs) > 0 { fmt.Println("The response matches the following ACLs:") for acl := range respACLs { fmt.Println(acl) } fmt.Println() } thisRule, ignored = conf.ChooseACLCategoryAction(acls, categories, "allow", "block", "block-invisible", "hash-image", "phrase-scan") if thisRule.Action == "" { fmt.Println("No ACL rule was triggered.") } else { fmt.Println("Triggered rule:", thisRule.Action, thisRule.Conditions()) if len(ignored) > 0 { fmt.Println("Ignored categories:", strings.Join(ignored, ", ")) } } if thisRule.Action != "phrase-scan" && thisRule.Action != "hash-image" { return } fmt.Println() contentType := resp.Header.Get("Content-Type") content, err := ioutil.ReadAll(resp.Body) if err != nil { fmt.Println("Error while reading response body:", err) return } var doc *html.Node switch thisRule.Action { case "phrase-scan": modified := false _, cs, _ := charset.DetermineEncoding(content, resp.Header.Get("Content-Type")) if strings.Contains(contentType, "html") { modified = conf.pruneContent(URL, &content, cs, acls, &doc) } if modified { cs = "utf-8" fmt.Println("Performed content pruning.") fmt.Println() } conf.scanContent(content, contentType, cs, tally) if len(tally) == 0 { fmt.Println("No content phrases match.") } else { fmt.Println("The following rules match:") printSortedTally(stringTally(tally)) } case "hash-image": img, _, err := image.Decode(bytes.NewReader(content)) if err != nil { fmt.Printf("Error decoding image: %v\n", err) return } hash := dhash.New(img) fmt.Println("The image's hash is", hash) for _, h := range conf.ImageHashes { distance := dhash.Distance(hash, h.Hash) if distance <= h.Threshold || h.Threshold == -1 && distance <= conf.DhashThreshold { tally[rule{imageHash, h.String()}]++ fmt.Printf("Matching image hash found: %v (%d bits difference)\n", h, distance) } } } scores = conf.categoryScores(tally) categories = conf.significantCategories(scores) if len(scores) > 0 { fmt.Println() fmt.Println("The response has the following category scores:") printSortedTally(scores) } fmt.Println() thisRule, ignored = conf.ChooseACLCategoryAction(acls, categories, "allow", "block", "block-invisible") if thisRule.Action == "" { fmt.Println("No ACL rule was triggered.") } else { fmt.Println("Triggered rule:", thisRule.Action, thisRule.Conditions()) if len(ignored) > 0 { fmt.Println("Ignored categories:", strings.Join(ignored, ", ")) } } }