Пример #1
0
//抓取店铺信息
func (this TaoShop) grabShopInfo(shop_url string) (si ShopInfo, err error) {
	var getTaoId = func(url string) string {
		reg, _ := regexp.Compile(`shop(\d+)\.`)
		res := reg.FindStringSubmatch(url)
		if len(res) >= 1 {
			return res[1]
		}
		return ""
	}

	var getShopId = func(url string, f func() string) string {
		//判断是否是天猫
		if strings.Contains(url, "tmall") {
			return f()
		}
		return getTaoId(url)
	}

	h := grab.GrabTaoHTML(shop_url)
	if h == "" {
		return si, errors.New("抓取数据为空")
	}

	node, err := grab.ParseNode(h)
	if err != nil {
		return si, err
	}

	si.ShopId = getShopId(shop_url, func() string {
		return grab.GetShopIdByShop(node)
	})
	si.ShopBoss = grab.GetShopBossByShop(h)
	si.ShopName = grab.GetShopNameByShop(node)
	si.ShopUrl = shop_url
	return si, nil
}
Пример #2
0
//添加商品
func AddGoodsInfo(gid string) (info map[string]string) {
	url := "http://item.taobao.com/item.htm?id=" + gid
	grab.SetGrabCookie(cookie)
	h := grab.GrabTaoHTML(url)
	p, _ := grab.ParseNode(h)

	//标签名称
	title := grab.GetTitle(p)
	if title == "淘宝网 - 淘!我喜欢" || strings.Contains(title, "出错啦!") {
		//log.Println("商品不存在,id为:", gid)
		return nil
	}

	if strings.Contains(title, "访问受限") {
		log.Println("访问受限,id为", gid)
		time.Sleep(time.Minute * 2)
		return nil
	}

	//标签id
	cateId := grab.GetCategoryId(h)

	cateInfo, err := GetTaoCat(cateId)
	if err == mgo.ErrNotFound {
		//log.Println("分类ID:", gid, "-", cateId, "-", err)
		return nil
	}
	//特性
	features := make(map[string]int)
	if v, ok := cateInfo["features"]; ok {
		for a, b := range v.(map[string]interface{}) {
			features[a] = b.(int)
		}
	}

	//属性
	attrbuites := grab.GetAttrbuites(p)
	//性别
	sex := 0
	for k, v := range sexmap {
		if strings.Contains(title, k) {
			sex = v
			break
		}
	}
	//人群
	people := 0
	for k, v := range peoplemap {
		if strings.Contains(title, k) {
			people = v
			break
		}
	}

	// 店铺信息
	shopId := grab.GetShopId(p)
	shopName := grab.GetShopName(p)
	shopUrl := grab.GetShopUrl(p)
	shopBoss := grab.GetShopBoss(p)

	//浏览数
	count := 0
	go func() {
		sess := GetSession()
		defer func() {
			sess.Close()
		}()

		sess.DB(modb).C("goods").Upsert(bson.M{"gid": gid}, bson.M{"$set": bson.M{
			"tagname": cateInfo["name"], "tagid": cateId, "features": features,
			"attrbuites": attrbuites, "sex": sex, "people": people,
			"shop_id": shopId, "shop_name": shopName, "shop_url": shopUrl,
			"shop_box": shopBoss, "count": count}})
	}()

	return map[string]string{
		"cid":       cateId,
		"shop_id":   shopId,
		"shop_name": shopName,
		"shop_url":  shopUrl,
		"shop_boss": shopBoss,
	}
}
Пример #3
0
//添加商品
func GrabGoodsInfo(gid string) (info map[string]interface{}) {
LABEL:
	url := "https://item.taobao.com/item.htm?id=" + gid
	grab.SetUserAgent(getUserAgent())
	//grab.SetTransport(getHttpProxy())
	h := grab.GrabTaoHTML(url)

	if h == "" {
		return nil
	}

	p, _ := grab.ParseNode(h)

	//标签名称
	title := grab.GetTitle(p)

	if title == "淘宝网 - 淘!我喜欢" || strings.Contains(title, "出错啦!") {
		//log.Println("商品不存在,id为:", gid)
		return nil
	}

	if strings.Contains(title, "访问受限") {
		log.Println("访问受限,id为", gid)
		time.Sleep(time.Minute * 2)
		goto LABEL
		return nil
	}

	//标签id
	cateId := grab.GetCategoryId(h)

	//标签信息

	cateInfo := make(map[string]interface{})

	if v, ok := cateList[cateId]; !ok {
		return nil
	} else {
		cateInfo = v
	}

	//特性
	features := make(map[string]int)
	if v, ok := cateInfo["features"]; ok {
		for a, b := range v.(map[string]interface{}) {
			features[a] = b.(int)
		}
	}

	//属性
	attrbuites := grab.GetAttrbuites(p)

	//性别
	sex := 0
	for k, v := range sexmap {
		if strings.Contains(title, k) {
			sex = v
			break
		}
	}
	//人群
	people := 0
	for k, v := range peoplemap {
		if strings.Contains(title, k) {
			people = v
			break
		}
	}

	// 店铺信息
	shopId := grab.GetShopId(p)
	shopName := grab.GetShopName(p)
	shopUrl := grab.GetShopUrl(p)
	shopBoss := grab.GetShopBoss(p)

	return map[string]interface{}{
		"shop_id":    shopId,
		"shop_name":  shopName,
		"shop_url":   shopUrl,
		"shop_boss":  shopBoss,
		"gid":        gid,
		"tagname":    cateInfo["name"],
		"tagid":      cateId,
		"features":   features,
		"attrbuites": attrbuites,
		"sex":        sex,
		"people":     people,
	}
}
Пример #4
0
//添加商品
func AddGoodsInfo(gid string) (cid string) {
	modb := IniFile.Section("mongo-xu_precise").Key("db").String()
	url := "http://item.taobao.com/item.htm?id=" + gid
	h := grab.GrabTaoHTML(url)
	p, _ := grab.ParseNode(h)
	sess := GetSession()

	//标签名称
	title := grab.GetTitle(p)
	if title == "淘宝网 - 淘!我喜欢" {
		return ""
	}

	//标签id
	cateId := grab.GetCategoryId(h)
	cateInfo, err := GetTaoCat(cateId)
	if err == mgo.ErrNotFound {
		log.Println("分类ID:", cateId, err)
		return ""
	}
	//特性
	features := make(map[string]int)
	if v, ok := cateInfo["features"]; ok {
		for a, b := range v.(map[string]interface{}) {
			features[a] = b.(int)
		}
	}

	//属性
	attrbuites := grab.GetAttrbuites(p)
	//性别
	sex := 0
	for k, v := range sexmap {
		if strings.Contains(title, k) {
			sex = v
			break
		}
	}
	//人群
	people := 0
	for k, v := range peoplemap {
		if strings.Contains(title, k) {
			people = v
			break
		}
	}
	//浏览数
	count := 0

	// 店铺信息
	shopId := grab.GetShopId(p)
	shopName := grab.GetShopName(p)
	shopUrl := grab.GetShopUrl(p)
	shopBoss := grab.GetShopBoss(p)

	go func() {
		sess.DB(modb).C("goods").Upsert(bson.M{"gid": gid}, bson.M{"gid": gid,
			"tagname": cateInfo["name"], "tagid": cateId, "features": features,
			"attrbuites": attrbuites, "sex": sex, "people": people, "shop_id": shopId,
			"shop_name": shopName, "shop_url": shopUrl, "shop_boss": shopBoss,
			"count": count,
		})
		sess.Close()
	}()

	return cateId
}