// 获取抓取的文章列表(分页) func FindArticles(lastId, limit string) []*model.Article { article := model.NewArticle() cond := "status IN(0,1)" if lastId != "0" { cond += " AND id<" + lastId } articleList, err := article.Where(cond).Order("id DESC").Limit(limit). FindAll() if err != nil { logger.Errorln("article service FindArticles Error:", err) return nil } topArticles, err := article.Where("top=?", 1).Order("id DESC").FindAll() if err != nil { logger.Errorln("article service Find Top Articles Error:", err) return nil } if len(topArticles) > 0 { articleList = append(topArticles, articleList...) } return articleList }
// 更新该文章的喜欢数 // objid:被喜欢对象id;num: 喜欢数(负数表示取消喜欢) func (self ArticleLike) UpdateLike(objid, num int) { // 更新喜欢数(TODO:暂时每次都更新表) err := model.NewArticle().Where("id=?", objid).Increment("likenum", num) if err != nil { logger.Errorln("更新文章喜欢数失败:", err) } }
// 博文总数 func ArticlesTotal() (total int) { total, err := model.NewArticle().Count() if err != nil { logger.Errorln("article service ArticlesTotal error:", err) } return }
// 索引博文 func IndexingArticle(isAll bool) { solrClient := NewSolrClient() articleObj := model.NewArticle() limit := strconv.Itoa(MaxRows) if isAll { id := 0 for { articleList, err := articleObj.Where("id>? AND status!=?", id, model.StatusOffline).Limit(limit).FindAll() if err != nil { logger.Errorln("IndexingArticle error:", err) break } if len(articleList) == 0 { break } for _, article := range articleList { if id < article.Id { id = article.Id } document := model.NewDocument(article, nil) addCommand := model.NewDefaultArgsAddCommand(document) solrClient.Push(addCommand) } solrClient.Post() } } }
// 更新该文章的评论信息 // cid:评论id;objid:被评论对象id;uid:评论者;cmttime:评论时间 func (self ArticleComment) UpdateComment(cid, objid, uid int, cmttime string) { id := strconv.Itoa(objid) // 更新评论数(TODO:暂时每次都更新表) err := model.NewArticle().Where("id="+id).Increment("cmtnum", 1) if err != nil { logger.Errorln("更新文章评论数失败:", err) } }
// 获取单条博文 func FindArticleById(id string) (*model.Article, error) { article := model.NewArticle() err := article.Where("id=" + id).Find() if err != nil { logger.Errorln("article service FindArticleById Error:", err) } return article, err }
// 获取多个文章详细信息 func FindArticlesByIds(ids []int) []*model.Article { if len(ids) == 0 { return nil } inIds := util.Join(ids, ",") articles, err := model.NewArticle().Where("id in(" + inIds + ")").FindAll() if err != nil { logger.Errorln("article service FindArticlesByIds error:", err) return nil } return articles }
// flush 将浏览数刷入数据库中 func (this *view) flush() { this.locker.Lock() defer this.locker.Unlock() objid := strconv.Itoa(this.objid) switch this.objtype { case model.TYPE_TOPIC: model.NewTopicEx().Where("tid="+objid).Increment("view", this.num) case model.TYPE_ARTICLE: model.NewArticle().Where("id="+objid).Increment("viewnum", this.num) case model.TYPE_RESOURCE: model.NewResourceEx().Where("id="+objid).Increment("viewnum", this.num) case model.TYPE_PROJECT: model.NewOpenProject().Where("id="+objid).Increment("viewnum", this.num) } this.num = 0 }
// 获取最新的10篇博文,随机展示3篇 func FindNewBlogs() []*model.Article { if len(articles) != 0 && startTime.Sub(time.Now()) < keepDuration { rnd := rand.Intn(len(articles) - 3) return articles[rnd : rnd+3] } startTime = time.Now() articleList, err := model.NewArticle().Where("post_status=publish and post_type=post").Order("post_date DESC").Limit("0, 10").FindAll() if err != nil { logger.Errorln("获取博客文章失败") return nil } // 内容截取一部分 for _, article := range articleList { t, _ := time.Parse("2006-01-02 15:04:05", article.PostDate) article.PostUri = t.Format("2006/01") + "/" + article.PostName } articles = articleList rnd := rand.Intn(len(articles) - 3) return articles[rnd : rnd+3] }
// 获取当前(id)博文以及前后博文 func FindArticlesById(idstr string) (curArticle *model.Article, prevNext []*model.Article, err error) { id := util.MustInt(idstr) cond := "id BETWEEN ? AND ? AND status!=2" articles, err := model.NewArticle().Where(cond, id-5, id+5).FindAll() if err != nil { logger.Errorln("article service FindArticlesById Error:", err) return } if len(articles) == 0 { return } prevNext = make([]*model.Article, 2) prevId, nextId := articles[0].Id, articles[len(articles)-1].Id for _, article := range articles { if article.Id < id && article.Id > prevId { prevId = article.Id prevNext[0] = article } else if article.Id > id && article.Id < nextId { nextId = article.Id prevNext[1] = article } else if article.Id == id { curArticle = article } } if prevId == id { prevNext[0] = nil } if nextId == id { prevNext[1] = nil } return }
// 修改文章信息 func ModifyArticle(user map[string]interface{}, form url.Values) (errMsg string, err error) { username := user["username"].(string) form.Set("op_user", username) fields := []string{ "title", "url", "cover", "author", "author_txt", "lang", "pub_date", "content", "tags", "status", "op_user", } query, args := updateSetClause(form, fields) id := form.Get("id") err = model.NewArticle().Set(query, args...).Where("id=" + id).Update() if err != nil { logger.Errorf("更新文章 【%s】 信息失败:%s\n", id, err) errMsg = "对不起,服务器内部错误,请稍后再试!" return } return }
// 获取抓取的文章列表(分页) func FindArticleByPage(conds map[string]string, curPage, limit int) ([]*model.Article, int) { conditions := make([]string, 0, len(conds)) for k, v := range conds { conditions = append(conditions, k+"="+v) } article := model.NewArticle() limitStr := strconv.Itoa((curPage-1)*limit) + "," + strconv.Itoa(limit) articleList, err := article.Where(strings.Join(conditions, " AND ")).Order("id DESC").Limit(limitStr). FindAll() if err != nil { logger.Errorln("article service FindArticleByPage Error:", err) return nil, 0 } total, err := article.Count() if err != nil { logger.Errorln("article service FindArticleByPage COUNT Error:", err) return nil, 0 } return articleList, total }
func GenSitemap() { sitemapFiles := []string{} // 首页 home := map[string]string{ "loc": "http://" + config.Config["domain"], "lastmode": time.Now().Format(time.RFC3339), } var ( little = 1 step = 4999 large = little + step ) // 文章 article := model.NewArticle() for { sitemapFile := "sitemap_article_" + strconv.Itoa(large) + ".xml" articles, err := article.Where("id BETWEEN ? AND ? AND status!=?", little, large, model.StatusOffline).FindAll("id", "mtime") little = large + 1 large = little + step if err != nil { continue } if len(articles) == 0 { break } data := map[string]interface{}{ "home": home, "articles": articles, } if err = output(sitemapFile, data); err == nil { sitemapFiles = append(sitemapFiles, sitemapFile) } } little = 1 large = little + step // 主题(帖子) topic := model.NewTopic() for { sitemapFile := "sitemap_topic_" + strconv.Itoa(large) + ".xml" topics, err := topic.Where("tid BETWEEN ? AND ? AND flag IN(?,?)", little, large, 0, 1).FindAll("tid", "mtime") little, large = large+1, little+step if err != nil { continue } if len(topics) == 0 { break } data := map[string]interface{}{ "home": home, "topics": topics, } if err = output(sitemapFile, data); err == nil { sitemapFiles = append(sitemapFiles, sitemapFile) } } little = 1 large = little + step // 资源 resource := model.NewResource() for { sitemapFile := "sitemap_resource_" + strconv.Itoa(large) + ".xml" resources, err := resource.Where("id BETWEEN ? AND ?", little, large).FindAll("id", "mtime") little, large = large+1, little+step if err != nil { continue } if len(resources) == 0 { break } data := map[string]interface{}{ "home": home, "resources": resources, } if err = output(sitemapFile, data); err == nil { sitemapFiles = append(sitemapFiles, sitemapFile) } } little = 1 large = little + step // 项目 project := model.NewOpenProject() for { sitemapFile := "sitemap_project_" + strconv.Itoa(large) + ".xml" projects, err := project.Where("id BETWEEN ? AND ? AND status=?", little, large, model.StatusOnline).FindAll("id", "uri", "mtime") little, large = large+1, little+step if err != nil { continue } if len(projects) == 0 { break } data := map[string]interface{}{ "home": home, "projects": projects, } if err = output(sitemapFile, data); err == nil { sitemapFiles = append(sitemapFiles, sitemapFile) } } little = 1 large = little + step // wiki wiki := model.NewWiki() for { sitemapFile := "sitemap_wiki_" + strconv.Itoa(large) + ".xml" wikis, err := wiki.Where("id BETWEEN ? AND ?", little, large).FindAll("id", "uri", "mtime") little, large = large+1, little+step if err != nil { continue } if len(wikis) == 0 { break } data := map[string]interface{}{ "home": home, "wikis": wikis, } if err = output(sitemapFile, data); err == nil { sitemapFiles = append(sitemapFiles, sitemapFile) } } file, err := os.Create(sitemapPath + "sitemapindex.xml") if err != nil { logger.Errorln("gen sitemap index file error:", err) return } defer file.Close() err = sitemapIndexTpl.Execute(file, map[string]interface{}{ "home": home, "sitemapFiles": sitemapFiles, }) if err != nil { logger.Errorln("execute sitemap index template error:", err) } }
func DelArticle(id string) error { return model.NewArticle().Where("id=" + id).Delete() }
// 获取url对应的文章并根据规则进行解析 func ParseArticle(articleUrl string, auto bool) (*model.Article, error) { articleUrl = strings.TrimSpace(articleUrl) if !strings.HasPrefix(articleUrl, "http") { articleUrl = "http://" + articleUrl } tmpArticle := model.NewArticle() err := tmpArticle.Where("url=" + articleUrl).Find("id") if err != nil || tmpArticle.Id != 0 { logger.Errorln(articleUrl, "has exists:", err) return nil, errors.New("has exists!") } urlPaths := strings.SplitN(articleUrl, "/", 5) domain := urlPaths[2] for k, v := range domainPatch { if strings.Contains(domain, k) && !strings.Contains(domain, "www."+k) { domain = v break } } rule := model.NewCrawlRule() err = rule.Where("domain=" + domain).Find() if err != nil { logger.Errorln("find rule by domain error:", err) return nil, err } if rule.Id == 0 { logger.Errorln("domain:", domain, "not exists!") return nil, errors.New("domain not exists") } var doc *goquery.Document if doc, err = goquery.NewDocument(articleUrl); err != nil { logger.Errorln("goquery newdocument error:", err) return nil, err } author, authorTxt := "", "" if rule.InUrl { index, err := strconv.Atoi(rule.Author) if err != nil { logger.Errorln("author rule is illegal:", rule.Author, "error:", err) return nil, err } author = urlPaths[index] authorTxt = author } else { if strings.HasPrefix(rule.Author, ".") || strings.HasPrefix(rule.Author, "#") { authorSelection := doc.Find(rule.Author) author, err = authorSelection.Html() if err != nil { logger.Errorln("goquery parse author error:", err) return nil, err } author = strings.TrimSpace(author) authorTxt = strings.TrimSpace(authorSelection.Text()) } else { // 某些个人博客,页面中没有作者的信息,因此,规则中 author 即为 作者 author = rule.Author authorTxt = rule.Author } } title := "" doc.Find(rule.Title).Each(func(i int, selection *goquery.Selection) { if title != "" { return } tmpTitle := strings.TrimSpace(strings.TrimPrefix(selection.Text(), "原")) tmpTitle = strings.TrimSpace(strings.TrimPrefix(tmpTitle, "荐")) tmpTitle = strings.TrimSpace(strings.TrimPrefix(tmpTitle, "转")) tmpTitle = strings.TrimSpace(strings.TrimPrefix(tmpTitle, "顶")) if tmpTitle != "" { title = tmpTitle } }) if title == "" { logger.Errorln("url:", articleUrl, "parse title error:", err) return nil, err } replacer := strings.NewReplacer("[置顶]", "", "[原]", "", "[转]", "") title = strings.TrimSpace(replacer.Replace(title)) contentSelection := doc.Find(rule.Content) // relative url -> abs url contentSelection.Find("img").Each(func(i int, s *goquery.Selection) { if v, ok := s.Attr("src"); ok { if !strings.HasPrefix(v, "http") { s.SetAttr("src", domain+v) } } }) content, err := contentSelection.Html() if err != nil { logger.Errorln("goquery parse content error:", err) return nil, err } content = strings.TrimSpace(content) txt := strings.TrimSpace(contentSelection.Text()) txt = articleRe.ReplaceAllLiteralString(txt, " ") txt = articleSpaceRe.ReplaceAllLiteralString(txt, " ") // 自动抓取,内容长度不能少于 300 字 if auto && len(txt) < 300 { logger.Infoln(articleUrl, "content is short") return nil, errors.New("content is short") } pubDate := util.TimeNow() if rule.PubDate != "" { pubDate = strings.TrimSpace(doc.Find(rule.PubDate).First().Text()) // sochina patch re := regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}") submatches := re.FindStringSubmatch(pubDate) if len(submatches) > 0 { pubDate = submatches[0] } } if pubDate == "" { pubDate = util.TimeNow() } article := model.NewArticle() article.Domain = domain article.Name = rule.Name article.Author = author article.AuthorTxt = authorTxt article.Title = title article.Content = content article.Txt = txt article.PubDate = pubDate article.Url = articleUrl article.Lang = rule.Lang article.Ctime = util.TimeNow() _, err = article.Insert() if err != nil { logger.Errorln("insert article error:", err) return nil, err } return article, nil }
// 订阅邮件通知 func EmailNotice() { beginDate := time.Now().Add(-7 * 24 * time.Hour).Format("2006-01-02") endDate := time.Now().Add(-24 * time.Hour).Format("2006-01-02") beginTime := beginDate + " 00:00:00" // 本周晨读(过去 7 天) readings, err := model.NewMorningReading().Where("ctime>? AND rtype=0", beginTime).Order("id DESC").FindAll() if err != nil { logger.Errorln("find morning reading error:", err) } // 本周精彩文章 articles, err := model.NewArticle().Where("ctime>? AND status!=2", beginTime).Order("cmtnum DESC, likenum DESC, viewnum DESC").Limit("10").FindAll() if err != nil { logger.Errorln("find article error:", err) } // 本周热门主题 topics, err := model.NewTopic().Where("ctime>? AND flag IN(0,1)", beginTime).Order("tid DESC").Limit("10").FindAll() if err != nil { logger.Errorln("find topic error:", err) } data := map[string]interface{}{ "readings": readings, "articles": articles, "topics": topics, "beginDate": beginDate, "endDate": endDate, } // 给所有用户发送邮件 userModel := model.NewUser() var ( lastUid = 0 limit = "500" users []*model.User ) for { users, err = userModel.Where("uid>?", lastUid).Order("uid ASC").Limit(limit).FindAll() if err != nil { logger.Errorln("find user error:", err) continue } if len(users) == 0 { break } for _, user := range users { if user.Unsubscribe == 1 { logger.Infoln("user unsubscribe", user) continue } data["email"] = user.Email data["token"] = GenUnsubscribeToken(user) content, err := genEmailContent(data) if err != nil { logger.Errorln("from email.html gen email content error:", err) continue } SendMail("每周精选", content, []string{user.Email}) if lastUid < user.Uid { lastUid = user.Uid } // 控制发信速度 time.Sleep(30 * time.Second) } } }