Пример #1
0
// Run Stat Command Implementation
func (c *StatCommand) Run(args []string) int {

	var startDate string
	var endDate string

	cmdFlags := flag.NewFlagSet("stat", flag.ContinueOnError)
	cmdFlags.Usage = func() { c.UI.Output(c.Help()) }
	env.ConfigFlags(cmdFlags)

	cmdFlags.StringVar(&startDate, "start", "", "Start Date.")
	cmdFlags.StringVar(&endDate, "end", "", "End Date.")

	if err := cmdFlags.Parse(args); err != nil {
		fmt.Println("Could not parse config: ", err)
		return 1
	}

	environment, err := env.GetEnvironment()
	if err != nil {
		fmt.Println("Could not parse config ", err)
		return 1
	}

	mysql, err = env.GetConnection(environment)
	if err != nil {
		fmt.Println("Could not Get DB Connection: ", err)
		return 1
	}

	startTime := time.Now().Add(time.Hour * -24)
	endTime := time.Now().Add(time.Hour * -24)

	if startDate != "" {
		startTime = parseDate(startDate)
	}

	if endDate != "" {
		endTime = parseDate(endDate).Add(time.Hour * 24)
	}

	fixPostsWithNoGroup()

	for startTime.Before(endTime) {

		currentDate := times.Format("Y-m-d", startTime)

		fmt.Println("Running date: ", currentDate, " ...")

		//loop users
		statUsers(currentDate)
		//loop groups
		statGroups(currentDate)

		startTime = startTime.Add(time.Hour * 24)
	}

	return 0
}
Пример #2
0
//New make new Thread Object from database
func (message *Message) New(db *sql.DB, id int) {
	stmt, _ := db.Prepare("SELECT * FROM threads WHERE id = ?")
	defer stmt.Close()
	var userID int
	var groupID int
	var nt mysql.NullTime
	stmt.QueryRow(id).Scan(&message.ID, &message.Title, &message.Content, &userID, &groupID, &nt)
	message.CreatedAt = times.Format("Y-m-d H:i:s", nt.Time)
	message.Author = NewUserBase(db, userID)
	message.Group = NewGroupBase(db, groupID)
}
Пример #3
0
// TransferUrl 将外站图片URL转为本站,如果失败,返回原图
func (this *UploaderLogic) TransferUrl(ctx context.Context, origUrl string, prefixs ...string) (string, error) {
	if origUrl == "" || strings.Contains(origUrl, "studygolang") {
		return origUrl, errors.New("origin image is empty or is studygolang.com")
	}

	resp, err := http.Get(origUrl)
	if err != nil {
		return origUrl, errors.New("获取图片失败")
	}
	defer resp.Body.Close()

	buf, _ := ioutil.ReadAll(resp.Body)

	md5 := goutils.Md5Buf(buf)
	objImage, err := this.findImage(md5)
	if err != nil {
		logger.Errorln("find image:", md5, "error:", err)
		return origUrl, err
	}

	if objImage.Pid > 0 {
		return objImage.Path, nil
	}

	ext := filepath.Ext(origUrl)
	if ext == "" {
		contentType := http.DetectContentType(buf)
		exts, _ := mime.ExtensionsByType(contentType)
		if len(exts) > 0 {
			ext = exts[0]
		}
	}

	prefix := times.Format("ymd")
	if len(prefixs) > 0 {
		prefix = prefixs[0]
	}
	path := prefix + "/" + md5 + ext
	reader := bytes.NewReader(buf)

	if len(buf) > MaxImageSize {
		return origUrl, errors.New("文件太大")
	}

	err = this.uploadMemoryFile(reader, path)
	if err != nil {
		return origUrl, err
	}

	go this.saveImage(buf, path)

	return path, nil
}
Пример #4
0
// Upload 上传图片
func (ImageController) Upload(ctx echo.Context) error {
	objLogger := getLogger(ctx)

	file, fileHeader, err := Request(ctx).FormFile("img")
	if err != nil {
		objLogger.Errorln("upload error:", err)
		return fail(ctx, 1, "非法文件上传!")
	}
	defer file.Close()

	// 如果是临时文件,存在硬盘中,则是 *os.File(大于32M),直接报错
	if _, ok := file.(*os.File); ok {
		objLogger.Errorln("upload error:file too large!")
		return fail(ctx, 2, "文件太大!")
	}

	buf, err := ioutil.ReadAll(file)
	if err != nil {
		return fail(ctx, 3, "文件读取失败!")
	}
	if len(buf) > logic.MaxImageSize {
		return fail(ctx, 4, "文件太大!")
	}

	imgDir := times.Format("ymd")
	if ctx.FormValue("avatar") != "" {
		imgDir = "avatar"
	}

	path, err := logic.DefaultUploader.UploadImage(ctx, file, imgDir, buf, filepath.Ext(fileHeader.Filename))
	if err != nil {
		return fail(ctx, 5, "文件上传失败!")
	}

	return success(ctx, map[string]interface{}{"uri": path})
}
Пример #5
0
// ParseArticle 获取 url 对应的文章并根据规则进行解析
func (ArticleLogic) ParseArticle(ctx context.Context, articleUrl string, auto bool) (*model.Article, error) {
	articleUrl = strings.TrimSpace(articleUrl)
	if !strings.HasPrefix(articleUrl, "http") {
		articleUrl = "http://" + articleUrl
	}

	tmpArticle := &model.Article{}
	_, err := MasterDB.Where("url=?", articleUrl).Get(tmpArticle)
	if err != nil || tmpArticle.Id != 0 {
		logger.Infoln(articleUrl, "has exists:", err)
		return nil, errors.New("has exists!")
	}

	urlPaths := strings.SplitN(articleUrl, "/", 5)
	domain := urlPaths[2]

	for k, v := range domainPatch {
		if strings.Contains(domain, k) && !strings.Contains(domain, "www."+k) {
			domain = v
			break
		}
	}

	rule := &model.CrawlRule{}
	_, err = MasterDB.Where("domain=?", domain).Get(rule)
	if err != nil {
		logger.Errorln("find rule by domain error:", err)
		return nil, err
	}

	if rule.Id == 0 {
		logger.Errorln("domain:", domain, "not exists!")
		return nil, errors.New("domain not exists")
	}

	var doc *goquery.Document
	if doc, err = goquery.NewDocument(articleUrl); err != nil {
		logger.Errorln("goquery newdocument error:", err)
		return nil, err
	}

	author, authorTxt := "", ""
	if rule.InUrl {
		index, err := strconv.Atoi(rule.Author)
		if err != nil {
			logger.Errorln("author rule is illegal:", rule.Author, "error:", err)
			return nil, err
		}
		author = urlPaths[index]
		authorTxt = author
	} else {
		if strings.HasPrefix(rule.Author, ".") || strings.HasPrefix(rule.Author, "#") {
			authorSelection := doc.Find(rule.Author)
			author, err = authorSelection.Html()
			if err != nil {
				logger.Errorln("goquery parse author error:", err)
				return nil, err
			}

			author = strings.TrimSpace(author)
			authorTxt = strings.TrimSpace(authorSelection.Text())
		} else {
			// 某些个人博客,页面中没有作者的信息,因此,规则中 author 即为 作者
			author = rule.Author
			authorTxt = rule.Author
		}
	}

	title := ""
	doc.Find(rule.Title).Each(func(i int, selection *goquery.Selection) {
		if title != "" {
			return
		}

		tmpTitle := strings.TrimSpace(selection.Text())
		tmpTitle = strings.TrimSpace(strings.TrimPrefix(tmpTitle, "原"))
		tmpTitle = strings.TrimSpace(strings.TrimPrefix(tmpTitle, "荐"))
		tmpTitle = strings.TrimSpace(strings.TrimPrefix(tmpTitle, "转"))
		tmpTitle = strings.TrimSpace(strings.TrimPrefix(tmpTitle, "顶"))
		if tmpTitle != "" {
			title = tmpTitle
		}
	})

	if title == "" {
		logger.Errorln("url:", articleUrl, "parse title error:", err)
		return nil, err
	}

	replacer := strings.NewReplacer("[置顶]", "", "[原]", "", "[转]", "")
	title = strings.TrimSpace(replacer.Replace(title))

	contentSelection := doc.Find(rule.Content)

	// relative url -> abs url
	contentSelection.Find("img").Each(func(i int, s *goquery.Selection) {
		if v, ok := s.Attr("src"); ok {
			if !strings.HasPrefix(v, "http") {
				s.SetAttr("src", domain+v)
			}
		}
	})

	content, err := contentSelection.Html()
	if err != nil {
		logger.Errorln("goquery parse content error:", err)
		return nil, err
	}
	content = strings.TrimSpace(content)
	txt := strings.TrimSpace(contentSelection.Text())
	txt = articleRe.ReplaceAllLiteralString(txt, " ")
	txt = articleSpaceRe.ReplaceAllLiteralString(txt, " ")

	// 自动抓取,内容长度不能少于 300 字
	if auto && len(txt) < 300 {
		logger.Errorln(articleUrl, "content is short")
		return nil, errors.New("content is short")
	}

	if auto && strings.Count(txt, "http://") > 10 {
		logger.Errorln(articleUrl, "content contains too many link!")
		return nil, errors.New("content contains too many link")
	}

	pubDate := times.Format("Y-m-d H:i:s")
	if rule.PubDate != "" {
		pubDate = strings.TrimSpace(doc.Find(rule.PubDate).First().Text())

		// oschina patch
		re := regexp.MustCompile("[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}")
		submatches := re.FindStringSubmatch(pubDate)
		if len(submatches) > 0 {
			pubDate = submatches[0]
		} else {
			// oschina 多少之前忽略
			pubDate = ""
		}
	}

	if pubDate == "" {
		pubDate = times.Format("Y-m-d H:i:s")
	} else {
		// YYYYY-MM-dd HH:mm
		if len(pubDate) == 16 && auto {
			// 三个月之前不入库
			pubTime, err := time.ParseInLocation("2006-01-02 15:04", pubDate, time.Local)
			if err == nil {
				if pubTime.Add(3 * 30 * 86400 * time.Second).Before(time.Now()) {
					return nil, errors.New("article is old!")
				}
			}
		}
	}

	article := &model.Article{
		Domain:    domain,
		Name:      rule.Name,
		Author:    author,
		AuthorTxt: authorTxt,
		Title:     title,
		Content:   content,
		Txt:       txt,
		PubDate:   pubDate,
		Url:       articleUrl,
		Lang:      rule.Lang,
	}

	_, err = MasterDB.Insert(article)
	if err != nil {
		logger.Errorln("insert article error:", err)
		return nil, err
	}

	return article, nil
}
Пример #6
0
func doRankHandler(w http.ResponseWriter, r *http.Request) {

	type Rank struct {
		Rank   int         `json:"rank"`
		Target interface{} `json:"target"`
		Value  int         `json:"value"`
	}

	type RankItem struct {
		ID    schema.Stat `json:"id"`
		Title string      `json:"title"`
		Items []Rank      `json:"items"`
	}

	type RankResult struct {
		Date       string     `json:"date"`
		UserRanks  []RankItem `json:"user_ranks"`
		GroupRanks []RankItem `json:"group_ranks"`
	}

	statDate := times.Format("Y-m-d", time.Now().Add(time.Hour*-24))

	rankResult := RankResult{}
	rankResult.Date = statDate

	environment, _ := env.GetEnvironment()
	redis := goredis.Client{}
	redis.Addr = environment.RedisServer + ":" + strconv.Itoa(environment.RedisPort)
	redisKey := "spored:ranks"
	redisValue, _ := redis.Get(redisKey)
	if redisValue == nil || string(redisValue) == "" {

		//select target date related user stats
		userWhereDict := map[schema.Stat]string{
			schema.StatThreads:     "1970-01-02",
			schema.StatPosts:       "1970-01-04",
			schema.StatLives:       "1970-01-06",
			schema.StatAttachments: "1970-01-08",
		}

		sql := "SELECT owner_id, stat_value FROM stats WHERE date = ? AND stat_type = ? AND owner_type = ? ORDER BY stat_value DESC LIMIT ?"
		stmt, _ := mysql.Prepare(sql)
		defer stmt.Close()

		for statType, date := range userWhereDict {
			rankSeq := 0
			userRank := RankItem{}
			userRank.ID = statType
			switch statType {
			case 2:
				userRank.Title = "用户帖子数排行"
			case 4:
				userRank.Title = "用户回复数排行"
			case 6:
				userRank.Title = "用户活跃版数排行"
			case 8:
				userRank.Title = "用户附件数排行"
			}
			rows, _ := stmt.Query(date, statType, schema.OwnerUser, 50)
			for rows.Next() {
				rankSeq++
				var ownerID, statValue int
				rank := Rank{}
				rows.Scan(&ownerID, &statValue)
				rank.Rank = rankSeq
				rank.Value = statValue
				user := schema.User{}
				user.New(mysql, ownerID)
				rank.Target = user
				userRank.Items = append(userRank.Items, rank)
			}
			rankResult.UserRanks = append(rankResult.UserRanks, userRank)
		}

		//select target date related group stats
		groupWhereDict := map[schema.Stat]string{
			schema.StatThreads: "1970-01-02",
			schema.StatPosts:   "1970-01-04",
			schema.StatLives:   "1970-01-06",
		}

		for statType, date := range groupWhereDict {
			rankSeq := 0
			groupRank := RankItem{}
			groupRank.ID = statType
			switch statType {
			case 2:
				groupRank.Title = "群组帖子数排行"
			case 4:
				groupRank.Title = "群组回复数排行"
			case 6:
				groupRank.Title = "群组活跃用户版数排行"
			}
			rows, _ := stmt.Query(date, statType, schema.OwnerGroup, 20)
			for rows.Next() {
				rankSeq++
				var ownerID, statValue int
				rank := Rank{}
				rows.Scan(&ownerID, &statValue)
				rank.Rank = rankSeq
				rank.Value = statValue
				group := schema.Group{}
				group.New(mysql, ownerID)
				rank.Target = group
				groupRank.Items = append(groupRank.Items, rank)
			}
			rankResult.GroupRanks = append(rankResult.GroupRanks, groupRank)
		}
		resultJSON, _ := json.Marshal(rankResult)
		fmt.Fprintf(w, string(resultJSON))
		redis.Set(redisKey, resultJSON)
		redis.Expire(redisKey, times.StrToLocalTime(times.Format("Y-m-d", time.Now().Add(24*time.Hour))+" 01:00:00").Unix()-time.Now().Unix())
		return
	}

	fmt.Fprintf(w, string(redisValue))
}