Example #1
0
func flushTagToken(htmlBuf *[]byte, tz *html.Tokenizer, url string) string {
	*htmlBuf = append(*htmlBuf, '<')
	tagName, hasAttr := tz.TagName()
	*htmlBuf = append(*htmlBuf, tagName...)
	if hasAttr {
		for {
			attrKey, attrValue, hasMore := tz.TagAttr()
			*htmlBuf = append(*htmlBuf, ' ')
			*htmlBuf = append(*htmlBuf, attrKey...)
			*htmlBuf = append(*htmlBuf, '=', '"')
			if tagAttrToProxy[string(tagName)][string(attrKey)] {
				urlInAttr := string(attrValue)
				*htmlBuf = append(*htmlBuf, []byte(GetProxiedUrl(urlInAttr, url))...)
			} else {
				*htmlBuf = append(*htmlBuf, attrValue...)
			}
			*htmlBuf = append(*htmlBuf, '"')
			if !hasMore {
				break
			}
		}
	}
	*htmlBuf = append(*htmlBuf, '>')
	if string(tagName) == "head" {
		*htmlBuf = append(*htmlBuf, []byte(getJsHookTag())...)
	}
	return string(tagName)
}
func (item *AnimeConventionItem) readResgiterNowurl(t *html.Tokenizer) {
	t.Next()
	if _, hasmore := t.TagName(); hasmore {
		if key, val, _ := t.TagAttr(); strings.EqualFold(string(key), "href") {
			item.registerNowURL = string(val)
		}
	}
}
func (item *AnimeConventionItem) readCountry(t *html.Tokenizer, hasmore bool) {
	if hasmore {
		_, val, _ := t.TagAttr()
		item.country = string(val)
		return

	}
	t.Next()
	item.country = string(t.Text())
}
func readAttributes(z *html.Tokenizer, buff []html.Attribute) []html.Attribute {
	buff = buff[:0]
	for {
		key, value, more := z.TagAttr()
		if key != nil {
			buff = append(buff, html.Attribute{Key: string(key), Val: string(value)})
		}

		if !more {
			return buff
		}
	}
}
func (item *AnimeConventionItem) readNameAndLink(t *html.Tokenizer) {
	if label := t.Next(); label == html.StartTagToken {
		_, hasmore := t.TagName()
		if hasmore {
			if key, val, _ := t.TagAttr(); strings.EqualFold(string(key), "href") {
				item.siteURL = string(val)
			}
		}
	}
	if label := t.Next(); label == html.TextToken {
		item.name = string(t.Text())
	}
}
Example #6
0
// AttrMap parses the attributes of the current element into a friendly map.
// It only makes sense to call this while processing a start or self closing tag token.
func AttrMap(hasAttr bool, z *html.Tokenizer) map[string]string {
	attrs := make(map[string]string)
	if !hasAttr {
		return attrs
	}
	for {
		k, v, more := z.TagAttr()
		attrs[string(k)] = string(v)
		if !more {
			break
		}
	}
	return attrs
}
func (item *AnimeConventionItem) Parse(t *html.Tokenizer) {
	for {
		label := t.Next()
		switch label {
		case html.ErrorToken:
			fmt.Errorf("%v\n", t.Err())
			return
		case html.TextToken:
			switch string(t.Text()) {
			case "Advance Rates:":
				//fmt.Println("rate")
				item.readadvanceRate(t)
			case "At-Door Rates:":
				item.readatDoorRate(t)
			}
		case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken:
			tag, hasmore := t.TagName()
			if strings.EqualFold(string(tag), "big") {
				item.readResgiterNowurl(t)
			} else if hasmore {
				key, val, hasmore := t.TagAttr()
				if strings.EqualFold(string(key), "itemprop") {
					//fmt.Println(string(val))
					switch string(val) {
					case "description":
						item.readDescription(t)
					case "latitude":
						item.readLatitude(t)
					case "longitude":
						item.readLongitude(t)
					case "startDate":
						item.readStartDate(t)
					case "endDate":
						item.readEndDate(t)
					case "location":
						item.readLocation(t)
					case "addressLocality":
						item.readCity(t)
					case "addressRegion":
						item.readState(t)
					case "addressCountry":
						item.readCountry(t, hasmore)
					case "name":
						item.readNameAndLink(t)
					}
				}
			}
		}
	}
}
func (item *AnimeConventionItem) readLocation(t *html.Tokenizer) {
	for {
		if label := t.Next(); label == html.StartTagToken {
			_, hasmore := t.TagName()
			if hasmore {
				if _, val, _ := t.TagAttr(); strings.EqualFold(string(val), "name") {
					break
				}
			}
		}
	}
	if label := t.Next(); label == html.TextToken {
		item.location = string(t.Text())
	}
}
Example #9
0
func parseDivX86(z *html.Tokenizer, in *Intrinsic) *Intrinsic {
	more := true
	var k, v []byte
	for more {
		k, v, more = z.TagAttr()
		//		fmt.Println("attr:", string(k))
		switch string(k) {
		case "class":
			val := string(v)
			if strings.Contains(val, "intrinsic") {
				in.FinishX86()
				return NewIntrinsic()
			}
			switch val {
			case "cpuid":
				in.CpuID = getText(z)
			case "instruction":
				in.Instruction = strings.ToUpper(getText(z))
			case "rettype":
				in.RetType = fixTypeX86(getText(z))
			case "param_type":
				in.cParam = &Param{Type: fixTypeX86(getText(z))}
			case "param_name":
				in.cParam.Name = getText(z)
				if !in.Params.HasParam(in.cParam.Name) {
					in.Params = append(in.Params, *in.cParam)
				}
				in.cParam = nil
			case "description":
				in.Description = strings.TrimSpace(getTextR(z))
			case "name":
				in.OrgName = getText(z)
				in.Name = fixFuncNameX86(in.OrgName)
			case "operation":
				in.Operation = strings.TrimSpace(getText(z))
			default:
				//fmt.Println("unparsed class:", string(v))

			}
		}
	}
	return in
}
func (list *AnimeConventionList) Parse(t *html.Tokenizer) {
	for {
		next := t.Next()
		switch next {
		case html.ErrorToken:
			return
		case html.TextToken:
			//fmt.Println(string(t.Text()))
		case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken:
			_, hasmore := t.TagName()
			if hasmore {
				key, val, _ := t.TagAttr()
				if strings.EqualFold(string(key), "href") && strings.HasPrefix(string(val), "/events/info.shtml") {
					var item = &AnimeConventionItem{}
					item.url = strings.Join([]string{"http://animecons.com", string(val)}, "")
					list.taskNum++
					go item.crawlInformation(&list.ConventionList)
					//time.Sleep(100 * time.Millisecond)
				}
			}
		}
	}
}
Example #11
0
func buildTokenArray(tokenizer *xhtml.Tokenizer) []tagInfo {
	tokens := []tagInfo{}
	for tt := tokenizer.Next(); tt != xhtml.ErrorToken; tt = tokenizer.Next() {
		switch tt {
		case xhtml.TextToken:
			txt := string(tokenizer.Text())
			if len(tokens) == 0 {
				info := tagInfo{
					raw: txt,
				}
				tokens = append(tokens, info)
			}
			tn, _ := tokenizer.TagName()
			key, val, _ := tokenizer.TagAttr()
			info := tagInfo{
				tag: string(tn),
				key: string(key),
				val: string(val),
				txt: txt,
			}
			tokens = append(tokens, info)
		case xhtml.StartTagToken:
			tn, _ := tokenizer.TagName()
			key, val, _ := tokenizer.TagAttr()
			info := tagInfo{
				tag: string(tn),
				key: string(key),
				val: string(val),
			}
			tokens = append(tokens, info)
		case xhtml.SelfClosingTagToken, xhtml.EndTagToken:
			tn, _ := tokenizer.TagName()
			key, val, _ := tokenizer.TagAttr()
			info := tagInfo{
				tag:        string(tn),
				key:        string(key),
				val:        string(val),
				closingTag: true,
			}
			tokens = append(tokens, info)
		}
	}
	return tokens
}
func (item *AnimeConventionItem) readEndDate(t *html.Tokenizer) {
	_, val, _ := t.TagAttr()
	item.endDate = string(val)
}
func (item *AnimeConventionItem) readLongitude(t *html.Tokenizer) {
	_, val, _ := t.TagAttr()
	item.longitude = string(val)
}
Example #14
0
File: parser.go Project: influx6/gu
func pullNode(tokens *html.Tokenizer, root *Markup) {
	var node *Markup

	for {
		token := tokens.Next()

		switch token {
		case html.ErrorToken:
			return

		case html.TextToken, html.CommentToken, html.DoctypeToken:
			text := strings.TrimSpace(string(tokens.Text()))

			if text == "" {
				continue
			}

			if token == html.CommentToken {
				text = "<!--" + text + "-->"
			}

			if node != nil {
				NewText(text).Apply(node)
				continue
			}

			NewText(text).Apply(root)
			continue

		case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken:
			if token == html.EndTagToken {
				node = nil
				return
			}

			tagName, hasAttr := tokens.TagName()

			node = NewMarkup(string(tagName), token == html.SelfClosingTagToken)
			node.Apply(root)

			if hasAttr {
			attrLoop:
				for {
					key, val, more := tokens.TagAttr()

					NewAttr(string(key), string(val)).Apply(node)

					if !more {
						break attrLoop
					}
				}
			}

			if token == html.SelfClosingTagToken {
				continue
			}

			pullNode(tokens, node)
		}
	}
}