Example #1
0
func MakeStockPriceSpider() *spiders.Spider {
	spider := spiders.Spider{}
	spider.Name = "stock_price_spider"
	urlPrefix := "http://money.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/"
	spider.BeforeMethod = func() {
		db.DefaultMysqlConnectMap.InitConnection(spider.Name, "root:@/wxstock?charset=utf8")
		rows, err := db.DefaultMysqlConnectMap.Query(spider.Name, "select id,code from db_stock")
		if err != nil {
			log.Println(err)
		} else {
			defer rows.Close()
			spider.ExtData = make(map[string]interface{})
			codeIdMap := make(map[string]int)
			spider.ExtData["codeIdMap"] = codeIdMap
			for rows.Next() {
				var id int
				var code string
				rows.Scan(&id, &code)
				codeIdMap[code] = id
			}
		}
		priceRow, priceErr := db.DefaultMysqlConnectMap.Query(spider.Name, "select id,date from db_stock_price")
		if priceErr != nil {
			log.Println(err)
		} else {
			defer priceRow.Close()
			existMap := make(map[string]bool)
			spider.ExtData["existMap"] = existMap
			for priceRow.Next() {
				var id int
				var date string
				priceRow.Scan(&id, &date)
				existMap[strconv.Itoa(id)+":"+date] = true
			}
		}
		maxDateRow, maxErr := db.DefaultMysqlConnectMap.Query(spider.Name, "select max(date),stock_id from db_stock_price group by stock_id")
		if maxErr != nil {
			log.Println(err)
		} else {
			defer maxDateRow.Close()
			maxDataMap := make(map[int]string)
			spider.ExtData["maxDataMap"] = maxDataMap
			for maxDateRow.Next() {
				var maxDate string
				var stockId int
				maxDateRow.Scan(&maxDate, &stockId)
				maxDataMap[stockId] = maxDate
			}
		}
	}
	spider.AfterMethod = func() {
		db.DefaultMysqlConnectMap.CloseContection(spider.Name)
	}
	spider.StartUrls = []string{"http://money.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/600570.phtml?year=2015&jidu=2"}
	spider.ParseMap = make(map[string]func(response *http.Response) ([]*http.Request, error))
	spider.ParseMap[spiders.BASE_PARSE_NAME] = func(response *http.Response) ([]*http.Request, error) {
		doc, err := goquery.NewDocumentFromReader(strings.NewReader(response.Body))
		if err != nil {
			log.Println(err)
			return nil, err
		}
		code := response.Request.GoRequest.URL.Path[len("/corp/go.php/vMS_MarketHistory/stockid/"):len("/corp/go.php/vMS_MarketHistory/stockid/600570")]
		doc.Find("#FundHoldSharesTable tr").Each(func(index int, selection *goquery.Selection) {
			if index == 0 {
				return
			}
			date := strings.TrimSpace(selection.First().Find("div a").Text())
			if len(date) == 0 {
				return
			}
			priceString := strings.TrimSpace(selection.Find("td:nth-child(3) div").Text())
			priceFloat, _ := strconv.ParseFloat(priceString, 32)
			price := int(priceFloat * 1000)
			id := (spider.ExtData["codeIdMap"]).(map[string]int)[code]
			_, ok := (spider.ExtData["existMap"]).(map[string]bool)[strconv.Itoa(id)+":"+date]
			if !ok {
				_, err := db.DefaultMysqlConnectMap.Exec(spider.Name, "insert into db_stock_price(gmt_create,creator,stock_id,date,price)values(now(),'go', ? , ? , ? )", id, date, price)
				if err != nil {
					log.Println(err)
				}
			}
		})
		return nil, nil
	}
	spider.InitStartUrls = func() {
		spider.StartUrls = make([]string, 0)
		today := time.Now()
		year := today.Year()
		month := today.Month()
		jidu := int(math.Ceil(float64(month) / float64(3)))
		codeIdMap := (spider.ExtData["codeIdMap"]).(map[string]int)
		maxDataMap := (spider.ExtData["maxDataMap"]).(map[int]string)
		for code, id := range codeIdMap {
			stringData, ok := maxDataMap[id]
			var maxYear int
			var maxJidu int
			if ok {
				maxDate, _ := time.Parse("2006-01-02", stringData)
				maxYear = maxDate.Year()
				maxMonth := maxDate.Month()
				maxJidu = int(math.Ceil(float64(maxMonth) / float64(3)))
			} else {
				maxYear = year
				maxJidu = jidu
			}
			var i int = 1
			if maxYear >= year {
				i = maxJidu
				for ; i <= jidu; i++ {
					spider.StartUrls = append(spider.StartUrls, urlPrefix+code+".phtml?year="+strconv.Itoa(year)+"&jidu="+strconv.Itoa(i))
				}
			} else {
				for ; i <= jidu; i++ {
					spider.StartUrls = append(spider.StartUrls, urlPrefix+code+".phtml?year="+strconv.Itoa(year)+"&jidu="+strconv.Itoa(i))
				}
				if i < maxJidu {
					i = maxJidu
				}
				for ; i < 5; i++ {
					spider.StartUrls = append(spider.StartUrls, urlPrefix+code+".phtml?year="+strconv.Itoa(year-1)+"&jidu="+strconv.Itoa(i))
				}
			}
		}
	}
	return &spider
}