// Run Stat Command Implementation func (c *StatCommand) Run(args []string) int { var startDate string var endDate string cmdFlags := flag.NewFlagSet("stat", flag.ContinueOnError) cmdFlags.Usage = func() { c.UI.Output(c.Help()) } env.ConfigFlags(cmdFlags) cmdFlags.StringVar(&startDate, "start", "", "Start Date.") cmdFlags.StringVar(&endDate, "end", "", "End Date.") if err := cmdFlags.Parse(args); err != nil { fmt.Println("Could not parse config: ", err) return 1 } environment, err := env.GetEnvironment() if err != nil { fmt.Println("Could not parse config ", err) return 1 } mysql, err = env.GetConnection(environment) if err != nil { fmt.Println("Could not Get DB Connection: ", err) return 1 } startTime := time.Now().Add(time.Hour * -24) endTime := time.Now().Add(time.Hour * -24) if startDate != "" { startTime = parseDate(startDate) } if endDate != "" { endTime = parseDate(endDate).Add(time.Hour * 24) } fixPostsWithNoGroup() for startTime.Before(endTime) { currentDate := times.Format("Y-m-d", startTime) fmt.Println("Running date: ", currentDate, " ...") //loop users statUsers(currentDate) //loop groups statGroups(currentDate) startTime = startTime.Add(time.Hour * 24) } return 0 }
//New make new Thread Object from database func (message *Message) New(db *sql.DB, id int) { stmt, _ := db.Prepare("SELECT * FROM threads WHERE id = ?") defer stmt.Close() var userID int var groupID int var nt mysql.NullTime stmt.QueryRow(id).Scan(&message.ID, &message.Title, &message.Content, &userID, &groupID, &nt) message.CreatedAt = times.Format("Y-m-d H:i:s", nt.Time) message.Author = NewUserBase(db, userID) message.Group = NewGroupBase(db, groupID) }
// TransferUrl 将外站图片URL转为本站,如果失败,返回原图 func (this *UploaderLogic) TransferUrl(ctx context.Context, origUrl string, prefixs ...string) (string, error) { if origUrl == "" || strings.Contains(origUrl, "studygolang") { return origUrl, errors.New("origin image is empty or is studygolang.com") } resp, err := http.Get(origUrl) if err != nil { return origUrl, errors.New("获取图片失败") } defer resp.Body.Close() buf, _ := ioutil.ReadAll(resp.Body) md5 := goutils.Md5Buf(buf) objImage, err := this.findImage(md5) if err != nil { logger.Errorln("find image:", md5, "error:", err) return origUrl, err } if objImage.Pid > 0 { return objImage.Path, nil } ext := filepath.Ext(origUrl) if ext == "" { contentType := http.DetectContentType(buf) exts, _ := mime.ExtensionsByType(contentType) if len(exts) > 0 { ext = exts[0] } } prefix := times.Format("ymd") if len(prefixs) > 0 { prefix = prefixs[0] } path := prefix + "/" + md5 + ext reader := bytes.NewReader(buf) if len(buf) > MaxImageSize { return origUrl, errors.New("文件太大") } err = this.uploadMemoryFile(reader, path) if err != nil { return origUrl, err } go this.saveImage(buf, path) return path, nil }
// Upload 上传图片 func (ImageController) Upload(ctx echo.Context) error { objLogger := getLogger(ctx) file, fileHeader, err := Request(ctx).FormFile("img") if err != nil { objLogger.Errorln("upload error:", err) return fail(ctx, 1, "非法文件上传!") } defer file.Close() // 如果是临时文件,存在硬盘中,则是 *os.File(大于32M),直接报错 if _, ok := file.(*os.File); ok { objLogger.Errorln("upload error:file too large!") return fail(ctx, 2, "文件太大!") } buf, err := ioutil.ReadAll(file) if err != nil { return fail(ctx, 3, "文件读取失败!") } if len(buf) > logic.MaxImageSize { return fail(ctx, 4, "文件太大!") } imgDir := times.Format("ymd") if ctx.FormValue("avatar") != "" { imgDir = "avatar" } path, err := logic.DefaultUploader.UploadImage(ctx, file, imgDir, buf, filepath.Ext(fileHeader.Filename)) if err != nil { return fail(ctx, 5, "文件上传失败!") } return success(ctx, map[string]interface{}{"uri": path}) }
// ParseArticle 获取 url 对应的文章并根据规则进行解析 func (ArticleLogic) ParseArticle(ctx context.Context, articleUrl string, auto bool) (*model.Article, error) { articleUrl = strings.TrimSpace(articleUrl) if !strings.HasPrefix(articleUrl, "http") { articleUrl = "http://" + articleUrl } tmpArticle := &model.Article{} _, err := MasterDB.Where("url=?", articleUrl).Get(tmpArticle) if err != nil || tmpArticle.Id != 0 { logger.Infoln(articleUrl, "has exists:", err) return nil, errors.New("has exists!") } urlPaths := strings.SplitN(articleUrl, "/", 5) domain := urlPaths[2] for k, v := range domainPatch { if strings.Contains(domain, k) && !strings.Contains(domain, "www."+k) { domain = v break } } rule := &model.CrawlRule{} _, err = MasterDB.Where("domain=?", domain).Get(rule) if err != nil { logger.Errorln("find rule by domain error:", err) return nil, err } if rule.Id == 0 { logger.Errorln("domain:", domain, "not exists!") return nil, errors.New("domain not exists") } var doc *goquery.Document if doc, err = goquery.NewDocument(articleUrl); err != nil { logger.Errorln("goquery newdocument error:", err) return nil, err } author, authorTxt := "", "" if rule.InUrl { index, err := strconv.Atoi(rule.Author) if err != nil { logger.Errorln("author rule is illegal:", rule.Author, "error:", err) return nil, err } author = urlPaths[index] authorTxt = author } else { if strings.HasPrefix(rule.Author, ".") || strings.HasPrefix(rule.Author, "#") { authorSelection := doc.Find(rule.Author) author, err = authorSelection.Html() if err != nil { logger.Errorln("goquery parse author error:", err) return nil, err } author = strings.TrimSpace(author) authorTxt = strings.TrimSpace(authorSelection.Text()) } else { // 某些个人博客,页面中没有作者的信息,因此,规则中 author 即为 作者 author = rule.Author authorTxt = rule.Author } } title := "" doc.Find(rule.Title).Each(func(i int, selection *goquery.Selection) { if title != "" { return } tmpTitle := strings.TrimSpace(selection.Text()) tmpTitle = strings.TrimSpace(strings.TrimPrefix(tmpTitle, "原")) tmpTitle = strings.TrimSpace(strings.TrimPrefix(tmpTitle, "荐")) tmpTitle = strings.TrimSpace(strings.TrimPrefix(tmpTitle, "转")) tmpTitle = strings.TrimSpace(strings.TrimPrefix(tmpTitle, "顶")) if tmpTitle != "" { title = tmpTitle } }) if title == "" { logger.Errorln("url:", articleUrl, "parse title error:", err) return nil, err } replacer := strings.NewReplacer("[置顶]", "", "[原]", "", "[转]", "") title = strings.TrimSpace(replacer.Replace(title)) contentSelection := doc.Find(rule.Content) // relative url -> abs url contentSelection.Find("img").Each(func(i int, s *goquery.Selection) { if v, ok := s.Attr("src"); ok { if !strings.HasPrefix(v, "http") { s.SetAttr("src", domain+v) } } }) content, err := contentSelection.Html() if err != nil { logger.Errorln("goquery parse content error:", err) return nil, err } content = strings.TrimSpace(content) txt := strings.TrimSpace(contentSelection.Text()) txt = articleRe.ReplaceAllLiteralString(txt, " ") txt = articleSpaceRe.ReplaceAllLiteralString(txt, " ") // 自动抓取,内容长度不能少于 300 字 if auto && len(txt) < 300 { logger.Errorln(articleUrl, "content is short") return nil, errors.New("content is short") } if auto && strings.Count(txt, "http://") > 10 { logger.Errorln(articleUrl, "content contains too many link!") return nil, errors.New("content contains too many link") } pubDate := times.Format("Y-m-d H:i:s") if rule.PubDate != "" { pubDate = strings.TrimSpace(doc.Find(rule.PubDate).First().Text()) // oschina patch re := regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}") submatches := re.FindStringSubmatch(pubDate) if len(submatches) > 0 { pubDate = submatches[0] } else { // oschina 多少之前忽略 pubDate = "" } } if pubDate == "" { pubDate = times.Format("Y-m-d H:i:s") } else { // YYYYY-MM-dd HH:mm if len(pubDate) == 16 && auto { // 三个月之前不入库 pubTime, err := time.ParseInLocation("2006-01-02 15:04", pubDate, time.Local) if err == nil { if pubTime.Add(3 * 30 * 86400 * time.Second).Before(time.Now()) { return nil, errors.New("article is old!") } } } } article := &model.Article{ Domain: domain, Name: rule.Name, Author: author, AuthorTxt: authorTxt, Title: title, Content: content, Txt: txt, PubDate: pubDate, Url: articleUrl, Lang: rule.Lang, } _, err = MasterDB.Insert(article) if err != nil { logger.Errorln("insert article error:", err) return nil, err } return article, nil }
func doRankHandler(w http.ResponseWriter, r *http.Request) { type Rank struct { Rank int `json:"rank"` Target interface{} `json:"target"` Value int `json:"value"` } type RankItem struct { ID schema.Stat `json:"id"` Title string `json:"title"` Items []Rank `json:"items"` } type RankResult struct { Date string `json:"date"` UserRanks []RankItem `json:"user_ranks"` GroupRanks []RankItem `json:"group_ranks"` } statDate := times.Format("Y-m-d", time.Now().Add(time.Hour*-24)) rankResult := RankResult{} rankResult.Date = statDate environment, _ := env.GetEnvironment() redis := goredis.Client{} redis.Addr = environment.RedisServer + ":" + strconv.Itoa(environment.RedisPort) redisKey := "spored:ranks" redisValue, _ := redis.Get(redisKey) if redisValue == nil || string(redisValue) == "" { //select target date related user stats userWhereDict := map[schema.Stat]string{ schema.StatThreads: "1970-01-02", schema.StatPosts: "1970-01-04", schema.StatLives: "1970-01-06", schema.StatAttachments: "1970-01-08", } sql := "SELECT owner_id, stat_value FROM stats WHERE date = ? AND stat_type = ? AND owner_type = ? ORDER BY stat_value DESC LIMIT ?" stmt, _ := mysql.Prepare(sql) defer stmt.Close() for statType, date := range userWhereDict { rankSeq := 0 userRank := RankItem{} userRank.ID = statType switch statType { case 2: userRank.Title = "用户帖子数排行" case 4: userRank.Title = "用户回复数排行" case 6: userRank.Title = "用户活跃版数排行" case 8: userRank.Title = "用户附件数排行" } rows, _ := stmt.Query(date, statType, schema.OwnerUser, 50) for rows.Next() { rankSeq++ var ownerID, statValue int rank := Rank{} rows.Scan(&ownerID, &statValue) rank.Rank = rankSeq rank.Value = statValue user := schema.User{} user.New(mysql, ownerID) rank.Target = user userRank.Items = append(userRank.Items, rank) } rankResult.UserRanks = append(rankResult.UserRanks, userRank) } //select target date related group stats groupWhereDict := map[schema.Stat]string{ schema.StatThreads: "1970-01-02", schema.StatPosts: "1970-01-04", schema.StatLives: "1970-01-06", } for statType, date := range groupWhereDict { rankSeq := 0 groupRank := RankItem{} groupRank.ID = statType switch statType { case 2: groupRank.Title = "群组帖子数排行" case 4: groupRank.Title = "群组回复数排行" case 6: groupRank.Title = "群组活跃用户版数排行" } rows, _ := stmt.Query(date, statType, schema.OwnerGroup, 20) for rows.Next() { rankSeq++ var ownerID, statValue int rank := Rank{} rows.Scan(&ownerID, &statValue) rank.Rank = rankSeq rank.Value = statValue group := schema.Group{} group.New(mysql, ownerID) rank.Target = group groupRank.Items = append(groupRank.Items, rank) } rankResult.GroupRanks = append(rankResult.GroupRanks, groupRank) } resultJSON, _ := json.Marshal(rankResult) fmt.Fprintf(w, string(resultJSON)) redis.Set(redisKey, resultJSON) redis.Expire(redisKey, times.StrToLocalTime(times.Format("Y-m-d", time.Now().Add(24*time.Hour))+" 01:00:00").Unix()-time.Now().Unix()) return } fmt.Fprintf(w, string(redisValue)) }