示例#1
0
文件: jd.go 项目: qgweb/new
func (this JDCrawl) Grab(gid string) map[string]interface{} {
	var info = make(map[string]interface{})
	url := "http://item.jd.com/" + gid + ".html"
	h := grab.GrabJDHTML(url)
	if h == "" {
		return nil
	}
	p, _ := grab.ParseNode(h)
	//标题
	info["title"] = grab.GetJDTitle(p)
	//分类
	cat := grab.GetJDCategory(p)
	info["cat_id"] = cat[1]
	info["cat_name"] = cat[0]
	//品牌
	info["brand"] = grab.GetJDBrand(p)
	//属性
	info["attributes"] = grab.GetJDAttributes(p)
	//id
	info["gid"] = gid
	return info
}
示例#2
0
文件: tao.go 项目: qgweb/new
func (this TaobaoCrawl) Grab(gid string) map[string]interface{} {
LABEL:
	url := "https://item.taobao.com/item.htm?id=" + gid
	h := grab.GrabTaoHTML(url)

	if h == "" {
		return nil
	}

	p, _ := grab.ParseNode(h)

	//标签名称
	title := grab.GetTitle(p)

	if title == "淘宝网 - 淘!我喜欢" || strings.Contains(title, "出错啦!") {
		//log.Println("商品不存在,id为:", gid)
		return nil
	}

	if strings.Contains(title, "访问受限") {
		log.Error("访问受限,id为", gid)
		time.Sleep(time.Minute * 2)
		goto LABEL
		return nil
	}

	//标签id
	cateId := grab.GetCategoryId(h)

	//标签信息

	//特性
	features := make(map[string]int)

	//属性
	attrbuites := grab.GetAttrbuites(p)

	//性别
	sex := 0
	for k, v := range sexmap {
		if strings.Contains(title, k) {
			sex = v
			break
		}
	}
	//人群
	people := 0
	for k, v := range peoplemap {
		if strings.Contains(title, k) {
			people = v
			break
		}
	}
	//品牌
	brand := grab.GetBrand(attrbuites)

	// 店铺信息
	shopId := grab.GetShopId(p)
	shopName := grab.GetShopName(p)
	shopUrl := grab.GetShopUrl(p)
	shopBoss := grab.GetShopBoss(p)

	return map[string]interface{}{
		"shop_id":   shopId,
		"shop_name": shopName,
		"shop_url":  shopUrl,
		"shop_boss": shopBoss,
		"gid":       gid,
		//"tagname":    cateInfo["name"],
		"tagid":      cateId,
		"features":   features,
		"attrbuites": attrbuites,
		"sex":        sex,
		"people":     people,
		"brand":      brand,
	}
}