func parseColors(s *goquery.Selection) string { colors := "" s.Each(func(i int, s *goquery.Selection) { colors += s.Text() }) return colors }
func parseTranslations(elements *goquery.Selection) (results []Translation) { elements.Each(func(index int, element *goquery.Selection) { results = append(results, Translation{parseMeaning(element), parseHref(element), parsePhrase(element)}) }) return }
func (this *parser) dropTag(selection *goquery.Selection) { selection.Each(func(i int, s *goquery.Selection) { node := s.Get(0) node.Data = s.Text() node.Type = html.TextNode }) }
func JoinNodesWithSpace(s *goquery.Selection) string { texts := []string{} s.Each(func(i int, s *goquery.Selection) { texts = append(texts, s.Text()) }) return strings.Join(texts, " ") }
func parseRegions(regionTags *goquery.Selection) []string { result := make([]string, 0, regionTags.Length()) regionTags.Each(func(n int, s *goquery.Selection) { result = append(result, s.Text()) }) return result }
func removeNodes(s *goquery.Selection) { s.Each(func(i int, s *goquery.Selection) { parent := s.Parent() if parent.Length() == 0 { // TODO??? } else { parent.Get(0).RemoveChild(s.Get(0)) } }) }
func filtraLigasP(seleccion *goquery.Selection) (planesGo []*goquery.Selection) { seleccion.Each(func(i int, anchor *goquery.Selection) { if attr, existe := anchor.Attr("href"); existe { if empata, _ := regexp.MatchString("/docencia/horarios/indiceplan/", attr); empata { planesGo = append(planesGo, anchor) } } }) return }
func (e *extractImages) hitCaches(imgs *goquery.Selection, attr string) []*Image { var hits []*Image imgs.Each(func(i int, img *goquery.Selection) { hit := e.hitCache(img, attr) if hit != nil { i := *hit i.Sel = img hits = append(hits, &i) } }) return hits }
func (f *follower) Links(sel *goquery.Selection) []string { if f.Selector != "" { sel = sel.Find(f.Selector) } var links []string sel.Each(func(i int, s *goquery.Selection) { for _, ext := range f.DataExtractors { links = append(links, ext.Extract(s)) } }) return links }
// Extract returns Items querying `sel` using ItemExtractor's DataExtractors func (e *ItemExtractor) Extract(sel *goquery.Selection) Items { var items Items if e.Selector != "" { sel = sel.Find(e.Selector) } sel.Each(func(i int, s *goquery.Selection) { item := make(Item) for name, ext := range e.DataExtractors { item[name] = ext.Extract(s) } items = append(items, item) }) return items }
func (ve *VideoExtractor) GetVideos(article *Article) *set.Set { doc := article.Doc var nodes *goquery.Selection for _, videoTag := range videoTags { tmpNodes := doc.Find(videoTag) if nodes == nil { nodes = tmpNodes } else { nodes.Union(tmpNodes) } } nodes.Each(func(i int, node *goquery.Selection) { tag := node.Get(0).DataAtom.String() var movie video switch tag { case "video": movie = ve.getVideoTag(node) break case "embed": movie = ve.getEmbedTag(node) break case "object": movie = ve.getObjectTag(node) break case "iframe": movie = ve.getIFrame(node) break default: { } } if movie.src != "" { ve.movies.Add(movie) } }) return ve.movies }
func pullMimes(w io.Writer, sel *goquery.Selection) { sel.Each(func(_ int, s *goquery.Selection) { tds := s.Find("td") style, ok := tds.Attr("style") if ok && strings.Contains(style, "cursor:") { return } extNode := tds.WrapNode(tds.Get(0)) html, _ := extNode.Html() ext := strings.Split(extNode.Text(), "\n") if len(ext) < 2 { return } var refs []string tds.WrapNode(tds.Get(2)).Find("a").Each(func(_ int, sel *goquery.Selection) { href, ok := sel.Attr("href") if !ok { return } if !strings.HasPrefix(href, "http") { return } refs = append(refs, fmt.Sprintf("%q", href)) }) if len(refs) > 0 { fmt.Fprintf(w, " AddExtensionType(%q, %q, %s)\n", html, strings.TrimSpace(ext[1]), strings.Join(refs, ",")) return } fmt.Fprintf(w, " AddExtensionType(%q, %q)\n", html, strings.TrimSpace(ext[1])) }) }
func Tianhuan(templete *Node, src *goquery.Selection, m map[string]interface{}) error { mySelect, properties, err := templete.GetSelect(SELECT) if err != nil { return err } var ser *goquery.Selection if mySelect == "" { fmt.Println("value-of 需要填写select属性") ser = src } else { ser = src.Find(mySelect) } var nodes = make([]*goquery.Selection, 0) if len(ser.Nodes) == 0 { return errors.New(mySelect + ":未搜索到数据") } if properties != nil { ser.EachWithBreak(func(i int, s *goquery.Selection) bool { if properties.Index != 0 { if properties.Index == i+1 { //index nodes = append(nodes, s) return false } return true } var lenn = len(s.Get(0).Attr) // var ma bool = false for b := 0; b < lenn; b++ { if _, ok := properties.Property[s.Get(0).Attr[b].Key]; ok { for _, v := range properties.Property[s.Get(0).Attr[b].Key] { if strings.Contains(v, ".*") { if ok, er := regexp.Match(v, []byte(s.Get(0).Attr[b].Val)); er != nil { return false } else { if ok { if Ifok(properties, s.Get(0), b) { nodes = append(nodes, s) } return true } } } if s.Get(0).Attr[b].Val == v { if Ifok(properties, s.Get(0), b) { nodes = append(nodes, s) return true } } } } } return true }) //abandon _, pro, err := templete.GetSelect(ABANDON) if err != nil { return err } if pro != nil { for k, _ := range pro.Property { nodes = nodesDelete(nodes, GetSelectsByValue(pro.Property[k], nodes, k)) } } } else { ser.Each(func(i int, s *goquery.Selection) { nodes = append(nodes, s) }) } //not 操作符 nodes = notInstructionCharacter(properties, nodes) fmt.Println("***************************") for _, vvv := range nodes { fmt.Println(vvv.Get(0).Attr, vvv.Get(0).Data) } fmt.Println("------------------------------") var index int = 0 for _, matchLabel := range templete.Children { switch matchLabel.LabelName { case STORAGE: //取值 if 0 != MatchMap(nodes, matchLabel, m, &index, nil) { break } case VALUEOF: //查找值 var bb *goquery.Selection var la = make([]*html.Node, 0) for _, ll := range nodes { la = append(la, ll.Get(0)) } bb.Nodes = la if err := Tianhuan(matchLabel, bb, m); err != nil { fmt.Println(err) } case LAYER: //给值加层级 if matchLabel.Attr[NAME] != "" { //判断是不是数组 是数组的话 新增一个[]map[string]interface{} 类型 m[matchLabel.Attr[NAME]] = make(map[string]interface{}) if matchLabel.Attr[ARRAY] == "true" || matchLabel.Attr[ARRAYEND] != "" { //判断数组什么时候结束 var property *Properties if matchLabel.Attr[ARRAYEND] != "" { _, property, err = matchLabel.GetSelect(ARRAYEND) if err != nil { inputLog.Println(err) break } arr := make([]map[string]interface{}, 0, 0) ToValueArray(nodes, matchLabel, &index, property, &arr) m[matchLabel.Attr[NAME]] = arr //数组 } //传递参数确认是否数组结束 } else { for i := 0; i < len(matchLabel.Children); i++ { if 0 != MatchMap(nodes, matchLabel.Children[i], m[matchLabel.Attr[NAME]].(map[string]interface{}), &index, nil) { break } } } } else { fmt.Println("未书写层级名称, 跳过此标签 和其他子标签") } default: fmt.Println("模板标签书写错误了吧") } } return nil }
func OutPutTianhuan(templete *Node, src *goquery.Selection, m map[string]interface{}) error { mySelect, properties, err := templete.GetSelect(SELECT) if err != nil { return err } var ser *goquery.Selection if mySelect == "" { fmt.Println("value-of 需要填写select属性") ser = src } else { ser = src.Find(mySelect) } var nodes = make([]*goquery.Selection, 0) if len(ser.Nodes) == 0 { return errors.New(mySelect + ":未搜索到数据") } if properties != nil { ser.EachWithBreak(func(i int, s *goquery.Selection) bool { if properties.Index != 0 { if properties.Index == i+1 { //index nodes = append(nodes, s) return false } return true } var lenn = len(s.Get(0).Attr) // var ma bool = false for b := 0; b < lenn; b++ { if _, ok := properties.Property[s.Get(0).Attr[b].Key]; ok { for _, v := range properties.Property[s.Get(0).Attr[b].Key] { if strings.Contains(v, ".*") { if ok, er := regexp.Match(v, []byte(s.Get(0).Attr[b].Val)); er != nil { return false } else { if ok { if Ifok(properties, s.Get(0), b) { nodes = append(nodes, s) } return true } } } if s.Get(0).Attr[b].Val == v { if Ifok(properties, s.Get(0), b) { nodes = append(nodes, s) return true } } } } } return true }) //abandon _, pro, err := templete.GetSelect(ABANDON) if err != nil { return err } if pro != nil { for k, _ := range pro.Property { nodes = nodesDelete(nodes, GetSelectsByValue(pro.Property[k], nodes, k)) } } } else { ser.Each(func(i int, s *goquery.Selection) { nodes = append(nodes, s) }) } //not 操作符 nodes = notInstructionCharacter(properties, nodes) var index int = 0 for _, matchLabel := range templete.Children { switch matchLabel.LabelName { case TOVALUE: //fmt.Println("修改值中") FillingValue(matchLabel, m, nodes, &index) case LAYER: //改变map path := matchLabel.Attr["path"] //path有值 var usem interface{} usem = GetValueFormMapByLayer(m, path) if usem != nil { //填网页 FillingValueByChildren(matchLabel, usem, nodes, &index) } default: fmt.Println("模板标签书写错误了吧") } } return nil }