Beispiel #1
0
func (b baiduNews) commonPrase(ctx *Context) (infoStr string) {
	body := ctx.GetDom().Find("body")

	var info *goquery.Selection

	if h1s := body.Find("h1"); len(h1s.Nodes) != 0 {
		for i := 0; i < len(h1s.Nodes); i++ {
			info = b.findP(h1s.Eq(i))
		}
	} else if h2s := body.Find("h2"); len(h2s.Nodes) != 0 {
		for i := 0; i < len(h2s.Nodes); i++ {
			info = b.findP(h2s.Eq(i))
		}
	} else if h3s := body.Find("h3"); len(h3s.Nodes) != 0 {
		for i := 0; i < len(h3s.Nodes); i++ {
			info = b.findP(h3s.Eq(i))
		}
	} else {
		info = body.Find("body")
	}
	infoStr, _ = info.Html()

	// 清洗HTML
	infoStr = CleanHtml(infoStr, 5)
	return
}
Beispiel #2
0
// Serialize converts the form fields into a url.Values type.
// Returns two url.Value types. The first is the form field values, and the
// second is the form button values.
func serializeForm(sel *goquery.Selection) (url.Values, url.Values) {
	input := sel.Find("input,button,textarea")
	if input.Length() == 0 {
		return url.Values{}, url.Values{}
	}

	fields := make(url.Values)
	buttons := make(url.Values)
	input.Each(func(_ int, s *goquery.Selection) {
		name, ok := s.Attr("name")
		if ok {
			typ, ok := s.Attr("type")
			if ok || s.Is("textarea") {
				if typ == "submit" {
					val, ok := s.Attr("value")
					if ok {
						buttons.Add(name, val)
					} else {
						buttons.Add(name, "")
					}
				} else {
					val, ok := s.Attr("value")
					if !ok {
						val = ""
					}
					fields.Add(name, val)
				}
			}
		}
	})

	return fields, buttons
}
Beispiel #3
0
func formAttributes(u string, form *goquery.Selection, schemeAndHost ...string) (string, string) {
	method, ok := form.Attr("method")
	if !ok {
		method = "GET"
	}
	action, ok := form.Attr("action")
	if !ok {
		action = u
	}
	if action, ok = MakeUrl(action, schemeAndHost...); !ok {
		return "", ""
	}

	return strings.ToUpper(method), action
}
Beispiel #4
0
func (b baiduNews) findP(html *goquery.Selection) *goquery.Selection {
	if html.Is("body") {
		return html
	} else if result := html.Parent().Find("p"); len(result.Nodes) == 0 {
		return b.findP(html.Parent())
	} else {
		return html.Parent()
	}
}