func Cache(u *urls.Url) error { for _, s := range badUrlStrings { if strings.Index(u.Url, s) != -1 { return fmt.Errorf("Url contains bad substring '%s'.", s) } } // Try a HEAD req first to get Content-Length header. res, err := http.Head(u.Url) if err != nil { return err } if res.StatusCode != 200 { return fmt.Errorf("Received non-200 response '%s' from server.", res.Status) } if size := res.Header.Get("Content-Length"); size != "" { if bytes, err := strconv.Atoi(size); err != nil { return fmt.Errorf("Received unparseable content length '%s' "+ "from server: %v.", size, err) } else if bytes > 1<<22 { return fmt.Errorf("Response too large (%d MB) to cache safely.", bytes/1024/1024) } } res, err = http.Get(u.Url) if err != nil { return err } defer res.Body.Close() // 1 << 22 == 4MB if res.ContentLength > 1<<22 { return fmt.Errorf("Response too large (%d MB) to cache safely.", res.ContentLength/1024/1024) } u.CachedAs = Encode(u.Url) fh, err := os.OpenFile(util.JoinPath(*urlCacheDir, u.CachedAs), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.FileMode(0600)) defer fh.Close() if err != nil { return err } if _, err := io.Copy(fh, res.Body); err != nil { return err } u.CacheTime = time.Now() u.MimeType = res.Header.Get("Content-Type") if _, err := uc.UpsertId(u.Id, u); err != nil { return err } return nil }
func Cache(u *urls.Url) error { for _, s := range badUrlStrings { if strings.Index(u.Url, s) != -1 { return fmt.Errorf("Url contains bad substring '%s'.", s) } } res, err := http.Get(u.Url) if err != nil { return err } defer res.Body.Close() if res.StatusCode != 200 { return fmt.Errorf("Received non-200 response '%s' from server.", res.Status) } // 1 << 22 == 4MB if res.ContentLength > 1<<22 { return fmt.Errorf("Response too large (%d MB) to cache safely.", res.ContentLength/1024/1024) } u.CachedAs = Encode(u.Url) fh, err := os.OpenFile(util.JoinPath(*urlCacheDir, u.CachedAs), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.FileMode(0600)) defer fh.Close() if err != nil { return err } if _, err := io.Copy(fh, res.Body); err != nil { return err } u.CacheTime = time.Now() u.MimeType = res.Header.Get("Content-Type") if _, err := uc.UpsertId(u.Id, u); err != nil { return err } return nil }
"github.com/fluffle/sp0rkle/bot" "github.com/fluffle/sp0rkle/collections/urls" "github.com/fluffle/sp0rkle/util" "gopkg.in/mgo.v2/bson" ) const shortenPath string = "/s/" const cachePath string = "/c/" const autoShortenLimit int = 120 var badUrlStrings = []string{ "4chan", } var urlCacheDir *string = flag.String("url_cache_dir", util.JoinPath(os.Getenv("HOME"), ".sp0rkle"), "Path to store cached content under.") var uc *urls.Collection // Remember the last url seen on a per-channel basis var lastseen = map[string]bson.ObjectId{} func Init() { uc = urls.Init() if err := os.MkdirAll(*urlCacheDir, 0700); err != nil { logging.Fatal("Couldn't create URL cache dir: %v", err) } bot.Handle(urlScan, client.PRIVMSG)