func parse2(s string) Fang { var fa Fang path := strings.Split(s, "/") raw_url := path[len(path)-1] raw_url = strings.Replace(raw_url, "___", "://", 1) raw_url = strings.Replace(raw_url, "_", "/", -1) fa.Url = raw_url dat, err := ioutil.ReadFile(s) check(err) var parser page_analysis.HtmlParser parser.RegisterSelector("title", func(i int, s *goquery.Selection) { fa.Title = strings.TrimSpace(s.Text()) }) parser.RegisterSelector("div.xiaoqu", func(i int, s *goquery.Selection) { str := string_util.Purify(s.Text(), "\n", "\t", " ") fmt.Println(str) fa.Community = str }) parser.RegisterSelector("span.pay-method", func(i int, s *goquery.Selection) { str := string_util.Purify(s.Text(), "\n", "\t", " ") fmt.Println(str) fa.PayMethod = str }) parser.RegisterSelector("em.house-price", func(i int, s *goquery.Selection) { str := string_util.Purify(s.Text(), "\n", "\t", " ") fmt.Println(str) fa.Cost = str }) parser.RegisterSelector("div.house-type", func(i int, s *goquery.Selection) { str := string_util.Purify(s.Text(), "\n", "\t", " ") fmt.Println(str) fa.HouseType = str }) parser.RegisterSelectorWithTextKeyWord("span.pl10", "更新时间", func(i int, s *goquery.Selection) { str := string_util.Purify(s.Text(), "\n", "\t", " ") fmt.Println(str) fa.UpdateTime = str }) parser.RegisterSelectorWithTextKeyWord("li.house-primary-content-li", "房屋", func(i int, s *goquery.Selection) { fa.HouseType2 = string_util.Purify(s.Text(), "\n", "\t", " ") }) parser.RegisterSelectorWithTextKeyWord("li.house-primary-content-li", "地址", func(i int, s *goquery.Selection) { fa.Address = string_util.Purify(s.Text(), "\n", "\t", " ") }) parser.RegisterSelectorWithTextKeyWord("li.house-primary-content-li", "配置", func(i int, s *goquery.Selection) { fa.Configuration = string_util.Purify(s.Text(), "\n", "\t", " ") }) parser.RegisterRegex(`baidulon:'(\d+\.\d+)'`, func(i int, r []string) { fa.BaiduLongitude = r[1] }) parser.RegisterRegex(`baidulat:'(\d+\.\d+)'`, func(i int, r []string) { fa.BaiduLatitude = r[1] }) parser.RegisterRegex(`,lon:'(\d+\.\d+)'`, func(i int, r []string) { fa.Longitude = r[1] }) parser.RegisterRegex(`,lat:'(\d+\.\d+)'`, func(i int, r []string) { fa.Latitude = r[1] }) parser.Parse(raw_url, string(dat)) return fa }
func parse(s string) Fang { var fa Fang path := strings.Split(s,"/") raw_url := path[len(path)-1] raw_url = strings.Replace(raw_url, "___","://", 1) raw_url = strings.Replace(raw_url, "_","/", -1) fa.Url = raw_url dat, err := ioutil.ReadFile(s) check(err) var parser page_analysis.HtmlParser parser.Parse(raw_url,string(dat)) doc := parser.GetDocument() doc.Find("title").Each(func(i int, s *goquery.Selection){ fa.Title = strings.TrimSpace(s.Text()) }) doc.Find("div.xiaoqu").Each(func(i int, s *goquery.Selection){ str := strings.Replace(s.Text(),"\n","",-1) str = strings.Replace(str,"\t","",-1) str = strings.Replace(str," ","",-1) fmt.Println(str) fa.Community = str }) doc.Find("span.pay-method").Each(func(i int, s *goquery.Selection){ str := strings.Replace(s.Text(),"\n","",-1) str = strings.Replace(str,"\t","",-1) str = strings.Replace(str," ","",-1) fmt.Println(str) fa.PayMethod = str }) doc.Find("em.house-price").Each(func(i int, s *goquery.Selection){ str := strings.Replace(s.Text(),"\n","",-1) str = strings.Replace(str,"\t","",-1) str = strings.Replace(str," ","",-1) fmt.Println(str) fa.Cost = str }) doc.Find("div.house-type").Each(func(i int, s *goquery.Selection){ str := strings.Replace(s.Text(),"\n","",-1) str = strings.Replace(str,"\t","",-1) str = strings.Replace(str," ","",-1) fmt.Println(str) fa.HouseType = str }) doc.Find("span.pl10").Each(func(i int, s *goquery.Selection){ str := strings.Replace(s.Text(),"\n","",-1) str = strings.Replace(str,"\t","",-1) str = strings.Replace(str," ","",-1) fmt.Println(str) if strings.Contains(str, "更新时间") { fa.UpdateTime = str } }) doc.Find("li.house-primary-content-li").Each(func(i int, s *goquery.Selection){ str := strings.Replace(s.Text(),"\n","",-1) str = strings.Replace(str,"\t","",-1) str = strings.Replace(str," ","",-1) fmt.Println(str) if strings.Contains(str, "租金") { // fa.CostFull = str } else if strings.Contains(str, "房屋") { fa.HouseType2 = str } else if strings.Contains(str, "地址") { fa.Address = str } else if strings.Contains(str, "配置") { fa.Configuration = str } }) l, _ := regexp.Compile(`baidulon:'(\d+\.\d+)'`) baiduLong := l.FindStringSubmatch(string(dat)) if len(baiduLong) > 1 { fa.BaiduLongitude = baiduLong[1] } r, _ := regexp.Compile(`baidulat:'(\d+\.\d+)'`) baiduLat := r.FindStringSubmatch(string(dat)) if len(baiduLat) > 1 { fa.BaiduLatitude = baiduLat[1] } ll, _ := regexp.Compile(`,lon:'(\d+\.\d+)'`) longs := ll.FindStringSubmatch(string(dat)) if len(longs) > 1 { fa.Longitude = longs[1] } rr, _ := regexp.Compile(`,lat:'(\d+\.\d+)'`) lat := rr.FindStringSubmatch(string(dat)) if len(lat) > 1 { fa.Latitude = lat[1] } return fa }