func handleImageDetails(ctx context.Context, w http.ResponseWriter, r *http.Request) { db := sq.DB(ctx) img, err := ImageByID(db, web.Args(ctx).ByIndex(0)) switch err { case nil: // all good case sq.ErrNotFound: web.StdHTMLResp(w, http.StatusNotFound) return default: log.Error("cannot get image", "image", web.Args(ctx).ByIndex(0), "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } img.Tags, err = ImageTags(db, img.ImageID) if err != nil { log.Error("cannot get image", "image", web.Args(ctx).ByIndex(0), "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } web.JSONResp(w, img, http.StatusOK) }
func fetch(urls []string) Entries { var result Entries p := gofeed.NewParser() for _, url := range urls { resp, err := http.Get(url) if err != nil { log.Error("cannot fetch feed", "url", url, "error", err.Error()) continue } feed, err := p.Parse(resp.Body) resp.Body.Close() if err != nil { log.Error("cannot parse feed", "url", url, "error", err.Error()) continue } for _, it := range feed.Items { result = append(result, &Entry{ Feed: Feed{ Title: html.UnescapeString(feed.Title), Link: feed.Link, }, Title: html.UnescapeString(it.Title), Link: it.Link, Published: parseTime(it.Published), }) } } return result }
func handleCreateTopic(ctx context.Context, w http.ResponseWriter, r *http.Request) { account, ok := auth.AuthRequired(pg.DB(ctx), w, r) if !ok { return } input := struct { Title string Tags []string Content string }{ Title: r.FormValue("title"), Tags: strings.Fields(r.FormValue("tags")), Content: r.FormValue("content"), } var errs []string if r.Method == "POST" { if input.Title == "" { errs = append(errs, `"title" is required`) } if input.Content == "" { errs = append(errs, `"content" is required`) } } if r.Method == "GET" || len(errs) != 0 { render(w, "topic_create.tmpl", input) return } db := pg.DB(ctx) tx, err := db.Beginx() if err != nil { log.Error("cannot start transaction", "error", err.Error()) respond500(w, r) return } defer tx.Rollback() topic := Topic{ AuthorID: int64(account.AccountID), Title: input.Title, Tags: input.Tags, } t, _, err := CreateTopicWithComment(tx, topic, input.Content) if err != nil { log.Error("cannot create topic with comment", "error", err.Error()) respond500(w, r) return } if err := tx.Commit(); err != nil { log.Error("cannot commit transaction", "error", err.Error()) respond500(w, r) return } http.Redirect(w, r, fmt.Sprintf("/t/%d", t.TopicID), http.StatusSeeOther) }
func main() { go func() { sigc := make(chan os.Signal) signal.Notify(sigc, syscall.SIGUSR1, syscall.SIGHUP) defer signal.Stop(sigc) log.Debug("updating feeds", "trigger", "init") feedreader.Update() for { select { case now := <-time.After(30 * time.Minute): log.Debug("updating feeds", "trigger", "clock", "time", now.String()) case sig := <-sigc: log.Debug("updating feeds", "trigger", "signal", "signal", sig.String()) } feedreader.Update() } }() rt := web.NewRouter(web.Routes{ {"GET", `/`, feedreader.HandleListEntries}, {"GET", `/sources`, feedreader.HandleListSources}, {web.AnyMethod, `.*`, feedreader.Handle404}, }) if err := http.ListenAndServe(":8000", rt); err != nil { log.Error("HTTP server failed", "error", err.Error()) } }
func handleListBookmarks(ctx context.Context, w http.ResponseWriter, r *http.Request) { offset, _ := strconv.ParseInt(r.URL.Query().Get("offset"), 10, 64) if offset < 0 { offset = 0 } bookmarks := make([]*Bookmark, 0, 100) err := pg.DB(ctx).Select(&bookmarks, ` SELECT b.* FROM bookmarks b ORDER BY created DESC LIMIT $1 OFFSET $2 `, 500, offset) if err != nil { log.Error("cannot select bookmarks", "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } resp := struct { Bookmarks []*Bookmark `json:"bookmarks"` }{ Bookmarks: bookmarks, } web.JSONResp(w, resp, http.StatusOK) }
func words(r io.Reader, stopw map[string]struct{}) map[string]int { counts := make(map[string]int) scanner := bufio.NewScanner(r) scanner.Split(bufio.ScanWords) for scanner.Scan() { w := strings.ToLower(scanner.Text()) if strings.HasPrefix(w, "<") || strings.HasSuffix(w, ">") { continue } w = strings.TrimRight(w, ",.") if len(w) > 40 { continue } if _, ok := stopw[w]; ok { continue } counts[w]++ } if err := scanner.Err(); err != nil { log.Error("scanner error", "error", err.Error()) } return counts }
func handleAddBookmark(ctx context.Context, w http.ResponseWriter, r *http.Request) { var input struct { Url string `json:"url"` } if err := json.NewDecoder(r.Body).Decode(&input); err != nil { web.JSONErr(w, err.Error(), http.StatusBadRequest) return } resp, err := ctxhttp.Get(ctx, &crawler, input.Url) if err != nil { log.Error("cannot crawl", "url", input.Url, "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } defer resp.Body.Close() body := make([]byte, 1024*20) if n, err := resp.Body.Read(body); err != nil && err != io.EOF { log.Error("cannot read crawler response", "url", input.Url, "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } else { body = body[:n] } title := pageTitle(body) var b Bookmark err = pg.DB(ctx).Get(&b, ` INSERT INTO bookmarks (title, url, created) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING RETURNING * `, title, input.Url, time.Now()) if err != nil { log.Error("cannot create bookmark", "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } web.JSONResp(w, b, http.StatusCreated) }
func imageMeta(r io.ReadSeeker) (*Image, error) { conf, err := jpeg.DecodeConfig(r) if err != nil { return nil, fmt.Errorf("cannot decode JPEG: %s", err) } // compute image hash from image content oid := sha256.New() if _, err := io.Copy(oid, r); err != nil { return nil, fmt.Errorf("cannot compute SHA: %s", err) } img := Image{ ImageID: encode(oid), Width: conf.Width, Height: conf.Height, } if _, err := r.Seek(0, os.SEEK_SET); err != nil { return nil, fmt.Errorf("cannot seek: %s", err) } if meta, err := exif.Decode(r); err != nil { log.Error("cannot extract EXIF metadata", "error", err.Error()) } else { if orientation, err := meta.Get(exif.Orientation); err != nil { log.Debug("cannot extract image orientation", "decoder", "EXIF", "error", err.Error()) } else { if o, err := orientation.Int(0); err != nil { log.Debug("cannot format orientation", "decoder", "EXIF", "error", err.Error()) } else { img.Orientation = o } } if dt, err := meta.Get(exif.DateTimeOriginal); err != nil { log.Debug("cannot extract image datetime original", "decoder", "EXIF", "error", err.Error()) } else { if raw, err := dt.StringVal(); err != nil { log.Debug("cannot format datetime original", "decoder", "EXIF", "error", err.Error()) } else { img.Created, err = time.Parse("2006:01:02 15:04:05", raw) if err != nil { log.Debug("cannot parse datetime original", "decoder", "EXIF", "value", raw, "error", err.Error()) } } } } return &img, nil }
func main() { conf := struct { HTTP string Postgres string }{ HTTP: "localhost:8000", Postgres: "host=localhost port=5432 user=postgres dbname=bb sslmode=disable", } envconf.Must(envconf.LoadEnv(&conf)) ctx := context.Background() ctx = auth.WithOAuth(ctx, map[string]*oauth2.Config{ "google": &oauth2.Config{ ClientID: "352914691292-2h70272sb408r3vibe4jm4egote804ka.apps.googleusercontent.com", ClientSecret: "L_bgOHLCgNYL-3KG8a5u99mF", RedirectURL: "http://bb.example.com:8000/login/success", Scopes: []string{ "https://www.googleapis.com/auth/userinfo.profile", "https://www.googleapis.com/auth/userinfo.email", }, Endpoint: oauth2google.Endpoint, }, }) ctx = cache.WithLocalCache(ctx, 1000) db, err := sql.Open("postgres", conf.Postgres) if err != nil { log.Fatal("cannot open database", "error", err.Error()) } defer db.Close() ctx = pg.WithDB(ctx, db) go func() { if err := db.Ping(); err != nil { log.Error("cannot ping database", "error", err.Error()) } }() app := bb.NewApp(ctx) log.Debug("running HTTP server", "address", conf.HTTP) if err := http.ListenAndServe(conf.HTTP, app); err != nil { log.Error("HTTP server error", "error", err.Error()) } }
func render(w io.Writer, templateName string, context interface{}) { // XXX cache t, err := parseFiles(baseTmplPath, "bb/templates/"+templateName) if err != nil { log.Error("cannot parse template", "template", templateName, "error", err.Error()) return } t = t.Funcs(baseFuncs) if err := t.Execute(w, context); err != nil { log.Error("cannot render template", "template", templateName, "error", err.Error()) return } }
func handleTopicDetails(ctx context.Context, w http.ResponseWriter, r *http.Request) { db := pg.DB(ctx) tid, _ := strconv.ParseInt(web.Args(ctx).ByIndex(0), 10, 64) topic, err := TopicByID(db, tid) switch err { case nil: // all good case pg.ErrNotFound: respond404(w, r) return default: log.Error("cannot get topic by ID", "topic", web.Args(ctx).ByIndex(0), "error", err.Error()) respond500(w, r) return } page, _ := strconv.ParseInt(r.URL.Query().Get("page"), 10, 64) comments, err := Comments(db, CommentsOpts{ Offset: (page - 1) * 200, Limit: 200, TopicID: topic.TopicID, }) if err != nil { log.Error("cannot get comments for topic", "topic", fmt.Sprint(topic.TopicID), "error", err.Error()) respond500(w, r) return } context := struct { Topic *Topic Comments []*Comment }{ Topic: topic, Comments: comments, } render(w, "topic_details.tmpl", context) }
func main() { conf := struct { HTTP string Postgres string Schema string }{ HTTP: "localhost:8000", Postgres: "dbname=postgres user=postgres sslmode=disable", } if err := envconf.LoadEnv(&conf); err != nil { log.Fatal("cannot load configuration", "error", err.Error()) } ctx, done := context.WithCancel(context.Background()) defer done() db, err := sql.Open("postgres", conf.Postgres) if err != nil { log.Fatal("cannot connect to database", "error", err.Error()) } defer db.Close() if err := db.Ping(); err != nil { log.Error("cannot ping database", "postgres", conf.Postgres, "error", err.Error()) } ctx = pg.WithDB(ctx, db) if conf.Schema != "" { if err := pg.LoadSchema(db, conf.Schema); err != nil { log.Error("cannot load schema", "schema", conf.Schema, "error", err.Error()) } } app := NewApplication(ctx) if err := http.ListenAndServe(conf.HTTP, app); err != nil { log.Error("HTTP server error", "error", err.Error()) } }
func articleHandler(rp *redis.Pool) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { type Article struct { Key string `redis:"-"` Url string `redis:"url"` Title string `redis:"title"` } rc := rp.Get() defer rc.Close() var articles []*Article for _, w := range r.URL.Query()["word"] { keys, err := redis.Strings(rc.Do("ZREVRANGE", "word:"+w, 0, 100)) if err != nil { log.Error("cannot get keys", "error", err.Error()) continue } for _, key := range keys { raw, err := redis.Values(rc.Do("HGETALL", key)) if err != nil { log.Error("cannot get article", "key", key, "error", err.Error()) continue } var art Article if err := redis.ScanStruct(raw, &art); err != nil { log.Error("cannot scan article", "key", key, "error", err.Error()) continue } art.Key = key articles = append(articles, &art) } } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(articles) } }
func render(w io.Writer, name string, context interface{}) { if debugTemplate { tmpl = template.Must(template.New("").Funcs(template.FuncMap{ "favicon": favicon, "hashcolor": hashcolor, "truncatechars": truncatechars, }).ParseGlob(getenv("TEMPLATES", "*/*/*.tmpl"))) } if err := tmpl.ExecuteTemplate(w, name, context); err != nil { log.Error("cannot render template", "name", name, "error", err.Error()) } }
func main() { minRepFl := flag.Int("minrep", 2, "Minimum repetition amount for word to be relevant") minWLenFl := flag.Int("minwlen", 3, "Minimum word length") stopwFl := flag.String("stopw", "", "Stopwords list") flag.Parse() stopw := make(map[string]struct{}) if *stopwFl != "" { stopw = stopwords(*stopwFl) } counts := make(map[string]int) scanner := bufio.NewScanner(os.Stdin) scanner.Split(bufio.ScanWords) for scanner.Scan() { w := strings.ToLower(scanner.Text()) if strings.HasPrefix(w, "<") || strings.HasSuffix(w, ">") { continue } w = strings.TrimRight(w, ",.") if len(w) > 40 { continue } if len(w) < *minWLenFl { continue } if _, ok := stopw[w]; ok { continue } counts[w]++ } if err := scanner.Err(); err != nil { log.Error("scanner error", "error", err.Error()) } var pairs pairs for word, count := range counts { if count >= *minRepFl { pairs = append(pairs, pair{word, count}) } } sort.Sort(pairs) for _, pair := range pairs { fmt.Printf("%s\t%d\n", pair.word, pair.count) } }
func stopwords(path string) map[string]struct{} { stopw := make(map[string]struct{}) fd, err := os.Open(path) if err != nil { log.Error("cannot open stopwords file", "error", err.Error()) return stopw } defer fd.Close() rd := bufio.NewReader(fd) for { word, err := rd.ReadString('\n') if err != nil { if err != io.EOF { log.Error("cannot read stopwords", "error", err.Error()) } return stopw } stopw[strings.TrimSpace(word)] = struct{}{} } }
func Update() { urls, err := sources() if err != nil { log.Error("cannot get sources", "error", err.Error()) return } entries := fetch(urls) sort.Sort(sort.Reverse(entries)) feeds.Lock() defer feeds.Unlock() feeds.entries = entries }
func handleListNotes(ctx context.Context, w http.ResponseWriter, r *http.Request) { notes, err := ListNotes(ctx) if err != nil { log.Error("cannot read note", "error", err.Error()) http.Error(w, err.Error(), http.StatusInternalServerError) return } resp := struct { Notes []*Note `json:"notes"` }{ Notes: notes, } web.JSONResp(w, resp, http.StatusOK) }
func keyManager() (*keys.KeyManager, func()) { var m keys.KeyManager t := time.NewTicker(24 * time.Hour) go func() { for range t.C { if id, err := m.GenerateKey(24 * 7 * time.Hour); err != nil { log.Error("cannot generate key", "error", err.Error()) } else { log.Debug("new key generated", "id", id) } } }() return &m, t.Stop }
func handleStorePaste(pid string, w http.ResponseWriter, r *http.Request) { b, err := ioutil.ReadAll(io.LimitReader(r.Body, 1024)) if err != nil { log.Error("cannot read body", "error", err.Error()) w.WriteHeader(http.StatusInternalServerError) fmt.Fprintln(w, "internal server error") return } db.mu.Lock() db.mem[pid] = b defer db.mu.Unlock() w.WriteHeader(http.StatusCreated) fmt.Fprintln(w, pid) }
func scrapHandler(urlc chan string) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { b, err := ioutil.ReadAll(r.Body) if err != nil { log.Error("cannot read body", "error", err.Error()) http.Error(w, err.Error(), http.StatusInternalServerError) return } go func() { for _, urlStr := range strings.Fields(string(b)) { urlc <- urlStr } }() fmt.Fprintln(w, "ok") } }
func handleListTopics(ctx context.Context, w http.ResponseWriter, r *http.Request) { opts := TopicsOpts{ Limit: 200, Tags: r.URL.Query()["tag"], } db := pg.DB(ctx) topics, err := Topics(db, opts) if err != nil { log.Error("cannot list topics", "error", err.Error()) } context := struct { Topics []*TopicWithAuthor }{ Topics: topics, } render(w, "topic_list.tmpl", context) }
func main() { conf := struct { HTTP string }{ HTTP: "localhost:8000", } if err := envconf.LoadEnv(&conf); err != nil { log.Fatal("cannot load configuration", "error", err.Error()) } ctx := context.Background() km, stop := keyManager() defer stop() ctx = keys.WithManager(ctx, km) app := NewApplication(ctx) if err := http.ListenAndServe(conf.HTTP, app); err != nil { log.Error("HTTP server error", "error", err.Error()) } }
func handleCreateComment(ctx context.Context, w http.ResponseWriter, r *http.Request) { input := struct { Content string }{ Content: r.FormValue("content"), } var errs []string if len(input.Content) == 0 { errs = append(errs, `"content" is required`) } if len(errs) != 0 { w.WriteHeader(http.StatusBadRequest) fmt.Fprintf(w, "%v", errs) return } db := pg.DB(ctx) tid, _ := strconv.ParseInt(web.Args(ctx).ByIndex(0), 10, 64) c, err := CreateComment(db, Comment{ TopicID: tid, Content: input.Content, AuthorID: 1, }) switch err { case nil: // ok default: log.Error("cannot create comment", "topic", fmt.Sprint(tid), "error", err.Error()) respond500(w, r) return } http.Redirect(w, r, fmt.Sprintf("/t/%d", c.TopicID), http.StatusSeeOther) }
func handleListImages(ctx context.Context, w http.ResponseWriter, r *http.Request) { offset, _ := strconv.ParseInt(r.URL.Query().Get("offset"), 10, 64) opts := ImagesOpts{ Offset: offset, Limit: 200, } // narrow to images tagged as specified for name, values := range r.URL.Query() { if !strings.HasPrefix(name, "tag_") { continue } for _, value := range values { opts.Tags = append(opts.Tags, KeyValue{ Key: name[4:], Value: value, }) } } imgs, err := Images(sq.DB(ctx), opts) if err != nil { log.Error("cannot list images", "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } if imgs == nil { imgs = make([]*Image, 0) } resp := struct { Images []*Image `json:"images"` }{ Images: imgs, } web.JSONResp(w, resp, http.StatusOK) }
func handleTagImage(ctx context.Context, w http.ResponseWriter, r *http.Request) { var input struct { Name string Value string } if err := json.NewDecoder(r.Body).Decode(&input); err != nil { web.JSONErr(w, err.Error(), http.StatusBadRequest) return } var errs []string if input.Name == "" { errs = append(errs, `"name" is required`) } if input.Value == "" { errs = append(errs, `"value" is required`) } if len(errs) != 0 { web.JSONErrs(w, errs, http.StatusBadRequest) return } db := sq.DB(ctx) img, err := ImageByID(db, web.Args(ctx).ByIndex(0)) switch err { case nil: // all good case sq.ErrNotFound: web.JSONErr(w, "parent image does not exist", http.StatusBadRequest) return default: log.Error("database error", "image", web.Args(ctx).ByIndex(0), "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } tag, err := CreateTag(db, Tag{ ImageID: img.ImageID, Name: input.Name, Value: input.Value, }) switch err { case nil: // all good, update storage meta case sq.ErrConflict: web.JSONResp(w, tag, http.StatusOK) return default: log.Error("cannot create object", "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } if img.Tags, err = ImageTags(db, img.ImageID); err != nil { log.Error("cannot get image tags", "image", img.ImageID, "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } fs := FileStore(ctx) if err := fs.PutMeta(img); err != nil { log.Error("cannot store image metadata", "image", img.ImageID, "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } web.JSONResp(w, tag, http.StatusCreated) }
func handleServeImage(ctx context.Context, w http.ResponseWriter, r *http.Request) { img, err := ImageByID(sq.DB(ctx), web.Args(ctx).ByIndex(0)) switch err { case nil: // all good case sq.ErrNotFound: web.StdJSONResp(w, http.StatusNotFound) return default: log.Error("cannot get object", "object", web.Args(ctx).ByIndex(0), "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } if web.CheckLastModified(w, r, img.Created) { return } fs := FileStore(ctx) fd, err := fs.Read(img.Created.Year(), img.ImageID) if err != nil { log.Error("cannot read image file", "image", img.ImageID, "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } defer fd.Close() w.Header().Set("X-Image-ID", img.ImageID) w.Header().Set("X-Image-Width", fmt.Sprint(img.Width)) w.Header().Set("X-Image-Height", fmt.Sprint(img.Height)) w.Header().Set("X-Image-Created", img.Created.Format(time.RFC3339)) w.Header().Set("Content-Type", "image/jpeg") if r.URL.Query().Get("resize") == "" { io.Copy(w, fd) return } image, err := jpeg.Decode(fd) if err != nil { log.Error("cannot read image file", "image", img.ImageID, "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } var width, height int if _, err := fmt.Sscanf(r.URL.Query().Get("resize"), "%dx%d", &width, &height); err != nil { log.Error("cannot resize image", "image", img.ImageID, "error", err.Error()) } else { switch img.Orientation { case 1: // all good case 3: image = imaging.Rotate180(image) case 8: image = imaging.Rotate90(image) case 6: image = imaging.Rotate270(image) default: log.Debug("unknown image orientation", "decoder", "EXIF", "image", img.ImageID, "value", fmt.Sprint(img.Orientation)) } image = imaging.Fill(image, width, height, imaging.Center, imaging.Linear) } imaging.Encode(w, image, imaging.JPEG) }
func handleUploadImage(ctx context.Context, w http.ResponseWriter, r *http.Request) { if err := r.ParseMultipartForm(10 * megabyte); err != nil { web.JSONResp(w, err.Error(), http.StatusBadRequest) return } var header *multipart.FileHeader for _, headers := range r.MultipartForm.File { for _, h := range headers { log.Debug("uploading file", "name", h.Filename) if header != nil { web.JSONErr(w, "cannot upload more than one time at once", http.StatusBadRequest) return } header = h } } if header == nil { web.JSONErr(w, "image file missing", http.StatusBadRequest) return } if !strings.HasSuffix(strings.ToLower(header.Filename), ".jpg") { // XXX this is not the best validation web.JSONErr(w, "only JPEG format is allowed", http.StatusBadRequest) return } fd, err := header.Open() if err != nil { log.Error("cannot open uploaded file", "name", header.Filename, "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } defer fd.Close() image, err := imageMeta(fd) if err != nil { log.Error("cannot extract image metadata", "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } // store image in database db := sq.DB(ctx) image, err = CreateImage(db, *image) switch err { case nil: // all good case sq.ErrConflict: // image already exists, nothing more to do here web.JSONResp(w, image, http.StatusOK) return default: log.Error("cannot create object", "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } if _, err := fd.Seek(0, os.SEEK_SET); err != nil { log.Error("cannot seek image", "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } fs := FileStore(ctx) if err := fs.Put(image, fd); err != nil { log.Error("cannot store image", "error", err.Error()) web.StdJSONResp(w, http.StatusInternalServerError) return } log.Debug("image file created", "id", image.ImageID) web.JSONResp(w, image, http.StatusCreated) }
func crawl(rp *redis.Pool, urlStr string, stopw map[string]struct{}) []string { log.Debug("crawling started", "url", urlStr) defer log.Debug("crawling done", "url", urlStr) resp, err := httpcli.Get(urlStr) if err != nil { log.Error("cannot GET", "url", urlStr, "error", err.Error()) return nil } defer resp.Body.Close() if ct := resp.Header.Get("Content-Type"); !isHtml(ct) { log.Debug("non HTML resource", "url", urlStr, "contentType", ct) return nil } body, err := ioutil.ReadAll(io.LimitReader(resp.Body, 100000)) if err != nil { log.Error("cannot read body", "url", urlStr, "error", err.Error()) return nil } text := htmlToText(bytes.NewReader(body)) key := fmt.Sprintf("article:%x", sha1.Sum(text)) rc := rp.Get() defer rc.Close() if exists, err := redis.Bool(rc.Do("EXISTS", key)); err != nil { log.Error("cannot query database", "url", urlStr, "error", err.Error()) return nil } else if exists { log.Debug("article already stored", "url", urlStr, "key", key) return nil } func() { // all redis update is done in single batch if err := rc.Send("MULTI"); err != nil { log.Error("cannot start MULTI", "error", err.Error()) return } err = rc.Send("HMSET", key, "url", urlStr, "title", pageTitle(body), "crated", time.Now().Unix()) if err != nil { log.Error("cannot write article data", "key", key, "url", urlStr, "error", err.Error()) return } for w, n := range words(bytes.NewReader(text), stopw) { if len(w) < 3 { continue } if err := rc.Send("ZADD", "word:"+w, n, key); err != nil { log.Error("cannot write word count", "key", key, "url", urlStr, "word", w, "error", err.Error()) return } } if _, err := rc.Do("EXEC"); err != nil { log.Error("cannot flush redis command", "key", key, "url", urlStr, "error", err.Error()) return } }() return pageUrls(body) }
func main() { if err := http.ListenAndServe(":8000", rt); err != nil { log.Error("HTTP server error", "error", err.Error()) } }