Exemplo n.º 1
0
// 从页面中获取指定的URL
func (s *Searcher) GetURLsFromPage(spider *configure.Spider) error {
	if s.Html == "" {
		return fmt.Errorf("Please get Html source first!")
	}
	urlnames := spider.GetURLName()
	if len(urlnames) == 0 {
		return fmt.Errorf("URL is empty!")
	}

	s.Urls = make(map[string][]string, len(urlnames))
	for _, name := range urlnames {
		// 忽略slice中可能产生的气泡
		if name == "" {
			continue
		}
		re := spider.GetURLByName(name)
		if re == nil {
			return fmt.Errorf("There is nothing in ", name)
		}
		s.Urls[name] = re.FindAllString(s.Html, -1)
	}

	return nil
}