Ejemplo n.º 1
0
func parse2(s string) Fang {
	var fa Fang
	path := strings.Split(s, "/")
	raw_url := path[len(path)-1]
	raw_url = strings.Replace(raw_url, "___", "://", 1)
	raw_url = strings.Replace(raw_url, "_", "/", -1)
	fa.Url = raw_url
	dat, err := ioutil.ReadFile(s)
	check(err)
	var parser page_analysis.HtmlParser
	parser.RegisterSelector("title", func(i int, s *goquery.Selection) {
		fa.Title = strings.TrimSpace(s.Text())
	})
	parser.RegisterSelector("div.xiaoqu", func(i int, s *goquery.Selection) {
		str := string_util.Purify(s.Text(), "\n", "\t", " ")
		fmt.Println(str)
		fa.Community = str
	})
	parser.RegisterSelector("span.pay-method", func(i int, s *goquery.Selection) {
		str := string_util.Purify(s.Text(), "\n", "\t", " ")
		fmt.Println(str)
		fa.PayMethod = str
	})
	parser.RegisterSelector("em.house-price", func(i int, s *goquery.Selection) {
		str := string_util.Purify(s.Text(), "\n", "\t", " ")
		fmt.Println(str)
		fa.Cost = str
	})
	parser.RegisterSelector("div.house-type", func(i int, s *goquery.Selection) {
		str := string_util.Purify(s.Text(), "\n", "\t", " ")
		fmt.Println(str)
		fa.HouseType = str
	})
	parser.RegisterSelectorWithTextKeyWord("span.pl10", "更新时间", func(i int, s *goquery.Selection) {
		str := string_util.Purify(s.Text(), "\n", "\t", " ")
		fmt.Println(str)
		fa.UpdateTime = str
	})
	parser.RegisterSelectorWithTextKeyWord("li.house-primary-content-li", "房屋", func(i int, s *goquery.Selection) {
		fa.HouseType2 = string_util.Purify(s.Text(), "\n", "\t", " ")
	})
	parser.RegisterSelectorWithTextKeyWord("li.house-primary-content-li", "地址", func(i int, s *goquery.Selection) {
		fa.Address = string_util.Purify(s.Text(), "\n", "\t", " ")
	})
	parser.RegisterSelectorWithTextKeyWord("li.house-primary-content-li", "配置", func(i int, s *goquery.Selection) {
		fa.Configuration = string_util.Purify(s.Text(), "\n", "\t", " ")
	})
	parser.RegisterRegex(`baidulon:'(\d+\.\d+)'`, func(i int, r []string) {
		fa.BaiduLongitude = r[1]
	})
	parser.RegisterRegex(`baidulat:'(\d+\.\d+)'`, func(i int, r []string) {
		fa.BaiduLatitude = r[1]
	})
	parser.RegisterRegex(`,lon:'(\d+\.\d+)'`, func(i int, r []string) {
		fa.Longitude = r[1]
	})
	parser.RegisterRegex(`,lat:'(\d+\.\d+)'`, func(i int, r []string) {
		fa.Latitude = r[1]
	})
	parser.Parse(raw_url, string(dat))
	return fa
}
Ejemplo n.º 2
0
func parse(s string) Fang {
	var fa Fang
	path := strings.Split(s,"/")
	raw_url := path[len(path)-1]
	raw_url = strings.Replace(raw_url, "___","://", 1)
	raw_url = strings.Replace(raw_url, "_","/", -1)
	fa.Url = raw_url
	dat, err := ioutil.ReadFile(s)
	check(err)
	var parser page_analysis.HtmlParser
	parser.Parse(raw_url,string(dat))
	doc := parser.GetDocument()
	doc.Find("title").Each(func(i int, s *goquery.Selection){
		fa.Title = strings.TrimSpace(s.Text())
	})
	doc.Find("div.xiaoqu").Each(func(i int, s *goquery.Selection){
		str := strings.Replace(s.Text(),"\n","",-1)
		str = strings.Replace(str,"\t","",-1)
		str = strings.Replace(str," ","",-1)
		fmt.Println(str)
		fa.Community = str
	})
	doc.Find("span.pay-method").Each(func(i int, s *goquery.Selection){
		str := strings.Replace(s.Text(),"\n","",-1)
		str = strings.Replace(str,"\t","",-1)
		str = strings.Replace(str," ","",-1)
		fmt.Println(str)
		fa.PayMethod = str
	})
	doc.Find("em.house-price").Each(func(i int, s *goquery.Selection){
		str := strings.Replace(s.Text(),"\n","",-1)
		str = strings.Replace(str,"\t","",-1)
		str = strings.Replace(str," ","",-1)
		fmt.Println(str)
		fa.Cost = str
	})
	doc.Find("div.house-type").Each(func(i int, s *goquery.Selection){
		str := strings.Replace(s.Text(),"\n","",-1)
		str = strings.Replace(str,"\t","",-1)
		str = strings.Replace(str," ","",-1)
		fmt.Println(str)
		fa.HouseType = str
	})
	doc.Find("span.pl10").Each(func(i int, s *goquery.Selection){
		str := strings.Replace(s.Text(),"\n","",-1)
		str = strings.Replace(str,"\t","",-1)
		str = strings.Replace(str," ","",-1)
		fmt.Println(str)
		if strings.Contains(str, "更新时间") {
			fa.UpdateTime = str
		}
	})
	doc.Find("li.house-primary-content-li").Each(func(i int, s *goquery.Selection){
		str := strings.Replace(s.Text(),"\n","",-1)
		str = strings.Replace(str,"\t","",-1)
		str = strings.Replace(str," ","",-1)
		fmt.Println(str)
		if strings.Contains(str, "租金") {
	//		fa.CostFull = str
		} else if strings.Contains(str, "房屋") {
			fa.HouseType2 = str
		} else if strings.Contains(str, "地址") {
			fa.Address = str
		} else if strings.Contains(str, "配置") {
			fa.Configuration = str
		}
	})
	l, _ := regexp.Compile(`baidulon:'(\d+\.\d+)'`)
	baiduLong := l.FindStringSubmatch(string(dat))
	if len(baiduLong) > 1 {
		fa.BaiduLongitude = baiduLong[1]
	}
	r, _ := regexp.Compile(`baidulat:'(\d+\.\d+)'`)
	baiduLat := r.FindStringSubmatch(string(dat))
	if len(baiduLat) > 1 {
		fa.BaiduLatitude = baiduLat[1]
	}
	ll, _ := regexp.Compile(`,lon:'(\d+\.\d+)'`)
	longs := ll.FindStringSubmatch(string(dat))
	if len(longs) > 1 {
		fa.Longitude = longs[1]
	}
	rr, _ := regexp.Compile(`,lat:'(\d+\.\d+)'`)
	lat := rr.FindStringSubmatch(string(dat))
	if len(lat) > 1 {
		fa.Latitude = lat[1]
	}
	return fa
}