示例#1
0
func (s *Searcher) GetDataFromPage(urlName string, spider *configure.Spider) (KeyData, error) {
	if urlName == "" {
		return nil, fmt.Errorf("URL's name can't empty!")
	}
	// contentName is a slice
	contentNames := spider.GetContentNames(urlName)

	// 初始化用于保存结果的map
	data := make(KeyData, len(contentNames))
	for _, name := range contentNames {
		// 获取每一个名字对应的正则表达式
		re := spider.GetContentValue(urlName, name)
		// 将匹配到的内容匹配到对应的名称下
		data[name] = re.FindAllStringSubmatch(s.Html, -1)
	}
	return data, nil
}
示例#2
0
// 从页面中获取指定的URL
func (s *Searcher) GetURLsFromPage(spider *configure.Spider) error {
	if s.Html == "" {
		return fmt.Errorf("Please get Html source first!")
	}
	urlnames := spider.GetURLName()
	if len(urlnames) == 0 {
		return fmt.Errorf("URL is empty!")
	}

	s.Urls = make(map[string][]string, len(urlnames))
	for _, name := range urlnames {
		// 忽略slice中可能产生的气泡
		if name == "" {
			continue
		}
		re := spider.GetURLByName(name)
		if re == nil {
			return fmt.Errorf("There is nothing in ", name)
		}
		s.Urls[name] = re.FindAllString(s.Html, -1)
	}

	return nil
}