func (ud *urlDriver) Cache(u *urls.Url) error { for _, s := range badUrlStrings { if strings.Index(u.Url, s) != -1 { return fmt.Errorf("Url contains bad substring '%s'.", s) } } res, err := http.Get(u.Url) if err != nil { return err } defer res.Body.Close() if res.StatusCode != 200 { return fmt.Errorf("Received non-200 response '%s' from server.", res.Status) } // 1 << 22 == 4MB if res.ContentLength > 1<<22 { return fmt.Errorf("Response too large (%d MB) to cache safely.", res.ContentLength/1024/1024) } u.CachedAs = ud.Encode(u.Url) fh, err := os.OpenFile(util.JoinPath(*urlCacheDir, u.CachedAs), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.FileMode(0600)) defer fh.Close() if err != nil { return err } if _, err := io.Copy(fh, res.Body); err != nil { return err } u.CacheTime = time.Now() u.MimeType = res.Header.Get("Content-Type") if _, err := ud.Upsert(bson.M{"url": u.Url}, u); err != nil { return err } return nil }
"os" "strings" "time" ) const driverName string = "urls" const shortenPath string = "/s/" const cachePath string = "/c/" const autoShortenLimit int = 120 var badUrlStrings = []string{ "4chan", } var urlCacheDir *string = flag.String("url_cache_dir", util.JoinPath(os.Getenv("HOME"), ".sp0rkle"), "Path to store cached content under.") type urlDriver struct { *urls.UrlCollection // Remember the last url seen on a per-channel basis lastseen map[string]bson.ObjectId l logging.Logger } func UrlDriver(db *db.Database, l logging.Logger) *urlDriver { return &urlDriver{ UrlCollection: urls.Collection(db, l), lastseen: make(map[string]bson.ObjectId), l: l, }