Esempio n. 1
0
func parseColors(s *goquery.Selection) string {
	colors := ""
	s.Each(func(i int, s *goquery.Selection) {
		colors += s.Text()
	})
	return colors
}
func parseTranslations(elements *goquery.Selection) (results []Translation) {
	elements.Each(func(index int, element *goquery.Selection) {
		results = append(results, Translation{parseMeaning(element), parseHref(element), parsePhrase(element)})
	})

	return
}
Esempio n. 3
0
func (this *parser) dropTag(selection *goquery.Selection) {
	selection.Each(func(i int, s *goquery.Selection) {
		node := s.Get(0)
		node.Data = s.Text()
		node.Type = html.TextNode
	})
}
Esempio n. 4
0
File: scrape.go Progetto: squat/drae
func JoinNodesWithSpace(s *goquery.Selection) string {
	texts := []string{}
	s.Each(func(i int, s *goquery.Selection) {
		texts = append(texts, s.Text())
	})
	return strings.Join(texts, " ")
}
Esempio n. 5
0
func parseRegions(regionTags *goquery.Selection) []string {
	result := make([]string, 0, regionTags.Length())
	regionTags.Each(func(n int, s *goquery.Selection) {
		result = append(result, s.Text())
	})

	return result
}
Esempio n. 6
0
func removeNodes(s *goquery.Selection) {
	s.Each(func(i int, s *goquery.Selection) {
		parent := s.Parent()
		if parent.Length() == 0 {
			// TODO???
		} else {
			parent.Get(0).RemoveChild(s.Get(0))
		}
	})
}
Esempio n. 7
0
func filtraLigasP(seleccion *goquery.Selection) (planesGo []*goquery.Selection) {
	seleccion.Each(func(i int, anchor *goquery.Selection) {
		if attr, existe := anchor.Attr("href"); existe {
			if empata, _ := regexp.MatchString("/docencia/horarios/indiceplan/", attr); empata {
				planesGo = append(planesGo, anchor)
			}
		}
	})
	return
}
Esempio n. 8
0
func (e *extractImages) hitCaches(imgs *goquery.Selection, attr string) []*Image {
	var hits []*Image

	imgs.Each(func(i int, img *goquery.Selection) {
		hit := e.hitCache(img, attr)
		if hit != nil {
			i := *hit
			i.Sel = img
			hits = append(hits, &i)
		}
	})

	return hits
}
Esempio n. 9
0
func (f *follower) Links(sel *goquery.Selection) []string {
	if f.Selector != "" {
		sel = sel.Find(f.Selector)
	}

	var links []string

	sel.Each(func(i int, s *goquery.Selection) {
		for _, ext := range f.DataExtractors {
			links = append(links, ext.Extract(s))
		}
	})

	return links
}
Esempio n. 10
0
// Extract returns Items querying `sel` using ItemExtractor's DataExtractors
func (e *ItemExtractor) Extract(sel *goquery.Selection) Items {
	var items Items

	if e.Selector != "" {
		sel = sel.Find(e.Selector)
	}

	sel.Each(func(i int, s *goquery.Selection) {
		item := make(Item)
		for name, ext := range e.DataExtractors {
			item[name] = ext.Extract(s)
		}
		items = append(items, item)
	})

	return items

}
Esempio n. 11
0
func (ve *VideoExtractor) GetVideos(article *Article) *set.Set {
	doc := article.Doc
	var nodes *goquery.Selection
	for _, videoTag := range videoTags {
		tmpNodes := doc.Find(videoTag)
		if nodes == nil {
			nodes = tmpNodes
		} else {
			nodes.Union(tmpNodes)
		}
	}

	nodes.Each(func(i int, node *goquery.Selection) {
		tag := node.Get(0).DataAtom.String()
		var movie video
		switch tag {
		case "video":
			movie = ve.getVideoTag(node)
			break
		case "embed":
			movie = ve.getEmbedTag(node)
			break
		case "object":
			movie = ve.getObjectTag(node)
			break
		case "iframe":
			movie = ve.getIFrame(node)
			break
		default:
			{
			}
		}

		if movie.src != "" {
			ve.movies.Add(movie)
		}
	})

	return ve.movies
}
Esempio n. 12
0
func pullMimes(w io.Writer, sel *goquery.Selection) {
	sel.Each(func(_ int, s *goquery.Selection) {
		tds := s.Find("td")

		style, ok := tds.Attr("style")
		if ok && strings.Contains(style, "cursor:") {
			return
		}

		extNode := tds.WrapNode(tds.Get(0))
		html, _ := extNode.Html()

		ext := strings.Split(extNode.Text(), "\n")
		if len(ext) < 2 {
			return
		}

		var refs []string

		tds.WrapNode(tds.Get(2)).Find("a").Each(func(_ int, sel *goquery.Selection) {
			href, ok := sel.Attr("href")
			if !ok {
				return
			}

			if !strings.HasPrefix(href, "http") {
				return
			}

			refs = append(refs, fmt.Sprintf("%q", href))
		})

		if len(refs) > 0 {
			fmt.Fprintf(w, " AddExtensionType(%q, %q, %s)\n", html, strings.TrimSpace(ext[1]), strings.Join(refs, ","))
			return
		}

		fmt.Fprintf(w, " AddExtensionType(%q, %q)\n", html, strings.TrimSpace(ext[1]))
	})
}
Esempio n. 13
0
func Tianhuan(templete *Node, src *goquery.Selection, m map[string]interface{}) error {

	mySelect, properties, err := templete.GetSelect(SELECT)
	if err != nil {
		return err
	}
	var ser *goquery.Selection
	if mySelect == "" {
		fmt.Println("value-of 需要填写select属性")
		ser = src
	} else {
		ser = src.Find(mySelect)
	}

	var nodes = make([]*goquery.Selection, 0)
	if len(ser.Nodes) == 0 {

		return errors.New(mySelect + ":未搜索到数据")
	}
	if properties != nil {

		ser.EachWithBreak(func(i int, s *goquery.Selection) bool {
			if properties.Index != 0 {
				if properties.Index == i+1 { //index
					nodes = append(nodes, s)
					return false
				}
				return true
			}
			var lenn = len(s.Get(0).Attr)
			//			var ma bool = false
			for b := 0; b < lenn; b++ {
				if _, ok := properties.Property[s.Get(0).Attr[b].Key]; ok {
					for _, v := range properties.Property[s.Get(0).Attr[b].Key] {
						if strings.Contains(v, ".*") {
							if ok, er := regexp.Match(v, []byte(s.Get(0).Attr[b].Val)); er != nil {
								return false
							} else {
								if ok {
									if Ifok(properties, s.Get(0), b) {
										nodes = append(nodes, s)
									}
									return true
								}
							}
						}

						if s.Get(0).Attr[b].Val == v {
							if Ifok(properties, s.Get(0), b) {
								nodes = append(nodes, s)
								return true
							}
						}
					}
				}

			}
			return true
		})

		//abandon
		_, pro, err := templete.GetSelect(ABANDON)
		if err != nil {
			return err
		}
		if pro != nil {
			for k, _ := range pro.Property {
				nodes = nodesDelete(nodes, GetSelectsByValue(pro.Property[k], nodes, k))
			}

		}

	} else {
		ser.Each(func(i int, s *goquery.Selection) {
			nodes = append(nodes, s)
		})
	}

	//not 操作符
	nodes = notInstructionCharacter(properties, nodes)

	fmt.Println("***************************")
	for _, vvv := range nodes {
		fmt.Println(vvv.Get(0).Attr, vvv.Get(0).Data)
	}
	fmt.Println("------------------------------")
	var index int = 0
	for _, matchLabel := range templete.Children {
		switch matchLabel.LabelName {
		case STORAGE: //取值
			if 0 != MatchMap(nodes, matchLabel, m, &index, nil) {
				break
			}
		case VALUEOF: //查找值
			var bb *goquery.Selection
			var la = make([]*html.Node, 0)
			for _, ll := range nodes {
				la = append(la, ll.Get(0))
			}
			bb.Nodes = la
			if err := Tianhuan(matchLabel, bb, m); err != nil {
				fmt.Println(err)
			}
		case LAYER: //给值加层级
			if matchLabel.Attr[NAME] != "" {

				//判断是不是数组 是数组的话 新增一个[]map[string]interface{} 类型
				m[matchLabel.Attr[NAME]] = make(map[string]interface{})
				if matchLabel.Attr[ARRAY] == "true" || matchLabel.Attr[ARRAYEND] != "" {
					//判断数组什么时候结束
					var property *Properties
					if matchLabel.Attr[ARRAYEND] != "" {
						_, property, err = matchLabel.GetSelect(ARRAYEND)
						if err != nil {
							inputLog.Println(err)
							break
						}
						arr := make([]map[string]interface{}, 0, 0)
						ToValueArray(nodes, matchLabel, &index, property, &arr)
						m[matchLabel.Attr[NAME]] = arr
						//数组
					}
					//传递参数确认是否数组结束

				} else {
					for i := 0; i < len(matchLabel.Children); i++ {
						if 0 != MatchMap(nodes, matchLabel.Children[i], m[matchLabel.Attr[NAME]].(map[string]interface{}), &index, nil) {
							break
						}

					}
				}
			} else {
				fmt.Println("未书写层级名称, 跳过此标签 和其他子标签")
			}

		default:
			fmt.Println("模板标签书写错误了吧")
		}
	}

	return nil
}
Esempio n. 14
0
func OutPutTianhuan(templete *Node, src *goquery.Selection, m map[string]interface{}) error {

	mySelect, properties, err := templete.GetSelect(SELECT)
	if err != nil {
		return err
	}
	var ser *goquery.Selection
	if mySelect == "" {
		fmt.Println("value-of 需要填写select属性")
		ser = src
	} else {
		ser = src.Find(mySelect)
	}

	var nodes = make([]*goquery.Selection, 0)
	if len(ser.Nodes) == 0 {

		return errors.New(mySelect + ":未搜索到数据")
	}
	if properties != nil {

		ser.EachWithBreak(func(i int, s *goquery.Selection) bool {
			if properties.Index != 0 {
				if properties.Index == i+1 { //index
					nodes = append(nodes, s)
					return false
				}
				return true
			}
			var lenn = len(s.Get(0).Attr)
			//			var ma bool = false
			for b := 0; b < lenn; b++ {
				if _, ok := properties.Property[s.Get(0).Attr[b].Key]; ok {

					for _, v := range properties.Property[s.Get(0).Attr[b].Key] {

						if strings.Contains(v, ".*") {
							if ok, er := regexp.Match(v, []byte(s.Get(0).Attr[b].Val)); er != nil {
								return false
							} else {
								if ok {
									if Ifok(properties, s.Get(0), b) {
										nodes = append(nodes, s)
									}
									return true
								}
							}
						}

						if s.Get(0).Attr[b].Val == v {
							if Ifok(properties, s.Get(0), b) {
								nodes = append(nodes, s)
								return true
							}
						}
					}
				}

			}
			return true
		})

		//abandon
		_, pro, err := templete.GetSelect(ABANDON)
		if err != nil {
			return err
		}
		if pro != nil {
			for k, _ := range pro.Property {
				nodes = nodesDelete(nodes, GetSelectsByValue(pro.Property[k], nodes, k))
			}

		}

	} else {
		ser.Each(func(i int, s *goquery.Selection) {
			nodes = append(nodes, s)
		})
	}

	//not 操作符
	nodes = notInstructionCharacter(properties, nodes)

	var index int = 0
	for _, matchLabel := range templete.Children {
		switch matchLabel.LabelName {
		case TOVALUE:
			//fmt.Println("修改值中")
			FillingValue(matchLabel, m, nodes, &index)
		case LAYER: //改变map
			path := matchLabel.Attr["path"] //path有值
			var usem interface{}
			usem = GetValueFormMapByLayer(m, path)
			if usem != nil {
				//填网页
				FillingValueByChildren(matchLabel, usem, nodes, &index)
			}

		default:
			fmt.Println("模板标签书写错误了吧")
		}
	}

	return nil
}