func (s *Searcher) GetDataFromPage(urlName string, spider *configure.Spider) (KeyData, error) { if urlName == "" { return nil, fmt.Errorf("URL's name can't empty!") } // contentName is a slice contentNames := spider.GetContentNames(urlName) // 初始化用于保存结果的map data := make(KeyData, len(contentNames)) for _, name := range contentNames { // 获取每一个名字对应的正则表达式 re := spider.GetContentValue(urlName, name) // 将匹配到的内容匹配到对应的名称下 data[name] = re.FindAllStringSubmatch(s.Html, -1) } return data, nil }
// 从页面中获取指定的URL func (s *Searcher) GetURLsFromPage(spider *configure.Spider) error { if s.Html == "" { return fmt.Errorf("Please get Html source first!") } urlnames := spider.GetURLName() if len(urlnames) == 0 { return fmt.Errorf("URL is empty!") } s.Urls = make(map[string][]string, len(urlnames)) for _, name := range urlnames { // 忽略slice中可能产生的气泡 if name == "" { continue } re := spider.GetURLByName(name) if re == nil { return fmt.Errorf("There is nothing in ", name) } s.Urls[name] = re.FindAllString(s.Html, -1) } return nil }