//抓取店铺信息 func (this TaoShop) grabShopInfo(shop_url string) (si ShopInfo, err error) { var getTaoId = func(url string) string { reg, _ := regexp.Compile(`shop(\d+)\.`) res := reg.FindStringSubmatch(url) if len(res) >= 1 { return res[1] } return "" } var getShopId = func(url string, f func() string) string { //判断是否是天猫 if strings.Contains(url, "tmall") { return f() } return getTaoId(url) } h := grab.GrabTaoHTML(shop_url) if h == "" { return si, errors.New("抓取数据为空") } node, err := grab.ParseNode(h) if err != nil { return si, err } si.ShopId = getShopId(shop_url, func() string { return grab.GetShopIdByShop(node) }) si.ShopBoss = grab.GetShopBossByShop(h) si.ShopName = grab.GetShopNameByShop(node) si.ShopUrl = shop_url return si, nil }
//添加商品 func AddGoodsInfo(gid string) (info map[string]string) { url := "http://item.taobao.com/item.htm?id=" + gid grab.SetGrabCookie(cookie) h := grab.GrabTaoHTML(url) p, _ := grab.ParseNode(h) //标签名称 title := grab.GetTitle(p) if title == "淘宝网 - 淘!我喜欢" || strings.Contains(title, "出错啦!") { //log.Println("商品不存在,id为:", gid) return nil } if strings.Contains(title, "访问受限") { log.Println("访问受限,id为", gid) time.Sleep(time.Minute * 2) return nil } //标签id cateId := grab.GetCategoryId(h) cateInfo, err := GetTaoCat(cateId) if err == mgo.ErrNotFound { //log.Println("分类ID:", gid, "-", cateId, "-", err) return nil } //特性 features := make(map[string]int) if v, ok := cateInfo["features"]; ok { for a, b := range v.(map[string]interface{}) { features[a] = b.(int) } } //属性 attrbuites := grab.GetAttrbuites(p) //性别 sex := 0 for k, v := range sexmap { if strings.Contains(title, k) { sex = v break } } //人群 people := 0 for k, v := range peoplemap { if strings.Contains(title, k) { people = v break } } // 店铺信息 shopId := grab.GetShopId(p) shopName := grab.GetShopName(p) shopUrl := grab.GetShopUrl(p) shopBoss := grab.GetShopBoss(p) //浏览数 count := 0 go func() { sess := GetSession() defer func() { sess.Close() }() sess.DB(modb).C("goods").Upsert(bson.M{"gid": gid}, bson.M{"$set": bson.M{ "tagname": cateInfo["name"], "tagid": cateId, "features": features, "attrbuites": attrbuites, "sex": sex, "people": people, "shop_id": shopId, "shop_name": shopName, "shop_url": shopUrl, "shop_box": shopBoss, "count": count}}) }() return map[string]string{ "cid": cateId, "shop_id": shopId, "shop_name": shopName, "shop_url": shopUrl, "shop_boss": shopBoss, } }
//添加商品 func GrabGoodsInfo(gid string) (info map[string]interface{}) { LABEL: url := "https://item.taobao.com/item.htm?id=" + gid grab.SetUserAgent(getUserAgent()) //grab.SetTransport(getHttpProxy()) h := grab.GrabTaoHTML(url) if h == "" { return nil } p, _ := grab.ParseNode(h) //标签名称 title := grab.GetTitle(p) if title == "淘宝网 - 淘!我喜欢" || strings.Contains(title, "出错啦!") { //log.Println("商品不存在,id为:", gid) return nil } if strings.Contains(title, "访问受限") { log.Println("访问受限,id为", gid) time.Sleep(time.Minute * 2) goto LABEL return nil } //标签id cateId := grab.GetCategoryId(h) //标签信息 cateInfo := make(map[string]interface{}) if v, ok := cateList[cateId]; !ok { return nil } else { cateInfo = v } //特性 features := make(map[string]int) if v, ok := cateInfo["features"]; ok { for a, b := range v.(map[string]interface{}) { features[a] = b.(int) } } //属性 attrbuites := grab.GetAttrbuites(p) //性别 sex := 0 for k, v := range sexmap { if strings.Contains(title, k) { sex = v break } } //人群 people := 0 for k, v := range peoplemap { if strings.Contains(title, k) { people = v break } } // 店铺信息 shopId := grab.GetShopId(p) shopName := grab.GetShopName(p) shopUrl := grab.GetShopUrl(p) shopBoss := grab.GetShopBoss(p) return map[string]interface{}{ "shop_id": shopId, "shop_name": shopName, "shop_url": shopUrl, "shop_boss": shopBoss, "gid": gid, "tagname": cateInfo["name"], "tagid": cateId, "features": features, "attrbuites": attrbuites, "sex": sex, "people": people, } }
//添加商品 func AddGoodsInfo(gid string) (cid string) { modb := IniFile.Section("mongo-xu_precise").Key("db").String() url := "http://item.taobao.com/item.htm?id=" + gid h := grab.GrabTaoHTML(url) p, _ := grab.ParseNode(h) sess := GetSession() //标签名称 title := grab.GetTitle(p) if title == "淘宝网 - 淘!我喜欢" { return "" } //标签id cateId := grab.GetCategoryId(h) cateInfo, err := GetTaoCat(cateId) if err == mgo.ErrNotFound { log.Println("分类ID:", cateId, err) return "" } //特性 features := make(map[string]int) if v, ok := cateInfo["features"]; ok { for a, b := range v.(map[string]interface{}) { features[a] = b.(int) } } //属性 attrbuites := grab.GetAttrbuites(p) //性别 sex := 0 for k, v := range sexmap { if strings.Contains(title, k) { sex = v break } } //人群 people := 0 for k, v := range peoplemap { if strings.Contains(title, k) { people = v break } } //浏览数 count := 0 // 店铺信息 shopId := grab.GetShopId(p) shopName := grab.GetShopName(p) shopUrl := grab.GetShopUrl(p) shopBoss := grab.GetShopBoss(p) go func() { sess.DB(modb).C("goods").Upsert(bson.M{"gid": gid}, bson.M{"gid": gid, "tagname": cateInfo["name"], "tagid": cateId, "features": features, "attrbuites": attrbuites, "sex": sex, "people": people, "shop_id": shopId, "shop_name": shopName, "shop_url": shopUrl, "shop_boss": shopBoss, "count": count, }) sess.Close() }() return cateId }