Пример #1
0
func DiscoverHtml(id string) (*string, os.Error) {
	resp, _, err := http.Get(id)
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()

	tokenizer := html.NewTokenizer(resp.Body)
	for {
		tt := tokenizer.Next()
		switch tt {
		case html.ErrorToken:
			log.Println("Error: ", tokenizer.Error())
			return nil, tokenizer.Error()
		case html.StartTagToken, html.EndTagToken:
			tk := tokenizer.Token()
			if tk.Data == "link" {
				ok := false
				for _, attr := range tk.Attr {
					if attr.Key == "rel" && attr.Val == "openid2.provider" {
						log.Println(tk.String())
						ok = true
					} else if attr.Key == "href" && ok {
						return &attr.Val, nil
					}
				}
			}
		}
	}
	return nil, &DiscoveryError{str: "provider not found"}
}
Пример #2
0
func main() {
	tokenizer := html.NewTokenizer(page.Body)
	foundStart := false
	for {
		ty := tonkenizer.Next()
		if ty == html.ErrorToken {
			break
		}
		if ty != html.StartTagToken {
			continue
		}
		t := tokenizer.Token()
		if t.Data != "a" {
			continue
		}
		for _, attr := range t.Attr {
			if "href" == attr.Key {
				if !foundStart || ((len(attr.Val) > 4) &&
					"http" == attr.Val[0:4]) {
					if ".." == attr.Val {
						foundStart = true
					}
					break
				}
				fmt.Printf("%s\n", attr.Val)
			}
		}
	}
}
Пример #3
0
// getHTMLTitle parses r as HTML and returns its title. An empty string is
// returned if no <title> tag is found or error happens. Please note that this
// function doesn't check for the <html> -> <head> -> <title> tag hierarchy,
// but just picks the first <title> tag and returns its text.
func getHTMLTitle(r io.Reader) string {
	z := html.NewTokenizer(r)
	for {
		tt := z.Next()
		if tt == html.ErrorToken {
			// probably the end of buffer, not an actual error
			break
		}

		if tt == html.StartTagToken {
			tn, _ := z.TagName()
			if string(tn) == "title" {
				// found the <title> tag, now return the next
				// token, which is actually the text just
				// after <title>
				z.Next()
				return strings.Trim(z.Token().String(), " \t\n\r")
			}
		}
	}

	// return empty in case of error or when no <title> tag was found
	return ""
}
Пример #4
0
func (v *visitor) VisitFile(path string, fi *os.FileInfo) {

	baseDir := HtmlDir
	if strings.HasSuffix(HtmlDir, "/") {
		baseDir = HtmlDir[0 : len(HtmlDir)-1]
	}
	baseDirName := baseDir[strings.LastIndex(baseDir, "/")+1:]
	packageAndFilename := path[len(baseDir)+1:]
	if !strings.Contains(packageAndFilename, "/") {
		return
	}
	packageName := packageAndFilename[0:strings.LastIndex(packageAndFilename, "/")]

	packagePath := baseDirName + "/" + packageName

	fmt.Print(packagePath, " ", fi.Name)
	if !strings.HasSuffix(fi.Name, ".html") {
		fmt.Println(" (skipping)")
		return
	} else {
		fmt.Println(" (processing)")
	}

	templateName := fi.Name[0:strings.Index(fi.Name, ".")]
	s := ""
	f, _ := os.Open(path)
	t := html.NewTokenizer(f)
	items := []Item{}
	root := Tag{
		Contents: make([]Element, 0),
	}
	currentElement := &root

	for {
		tt := t.Next()

		if tt == html.ErrorToken {
			if t.Error().String() == "EOF" {
				break
			}
		}
		token := t.Token()
		switch token.Type {
		case html.TextToken:
			currentElement.Contents = append(currentElement.Contents, &Text{
				Text:   token.Data,
				parent: currentElement,
			})
			s1 := strings.Replace(token.Data, `"`, `\"`, -1)
			s2 := strings.Replace(s1, `
`, `\n`, -1)
			s += s2
		case html.StartTagToken, html.SelfClosingTagToken:
			tagPlainText := ``
			att := token.Attr
			tagId := ``
			tagPlainText += `<` + token.Data
			tagAttributes := make(map[string]string)
			tagStyles := make(map[string]string)

			for _, v := range att {
				val := v.Val
				if strings.ToLower(v.Key) == "id" {
					val = `"+id+"_` + v.Val
					newItem := Item{
						Id:            v.Val,
						ItemNameLower: toLower(v.Val),
						ItemNameUpper: toUpper(v.Val),
					}
					items = append(items, newItem)
					tagId = v.Val
				} else if strings.ToLower(v.Key) == `style` {
					tagStyles = makeMap(v.Val, `;`, `:`)
				} else {
					tagAttributes[v.Key] = v.Val
				}
				tagPlainText += ` ` + v.Key + `=\"` + val + `\"`
			}

			if token.Type == html.StartTagToken {
				tagPlainText += `>`
			} else {
				tagPlainText += ` />`
			}
			s += tagPlainText

			newTag := &Tag{
				Id:         tagId,
				Name:       token.Data,
				Attributes: tagAttributes,
				Styles:     tagStyles,
				Contents:   make([]Element, 0),
				parent:     currentElement,
			}
			currentElement.Contents = append(currentElement.Contents, newTag)
			if token.Type == html.StartTagToken {
				currentElement = newTag
			}

		case html.EndTagToken:
			tagPlainText := fmt.Sprint(`</`, token.Data, `>`)
			s += tagPlainText
			currentElement = currentElement.Parent()
		}
	}
	sOut := `<script>function template_` + templateName + `(id){return "` + s + `"}</script>`

	defs := ``
	names := ``
	sequence := 0
	namesMap := make(map[string]string)
	for _, v := range root.Contents {
		newSequence, name, def := v.Definition(sequence, namesMap)
		defs += def
		names += name + `, `
		sequence = newSequence
	}

	for i := 0; i < len(items); i++ {
		items[i].Variable = namesMap[items[i].Id]
	}

	temp := Template{
		PackageName:            packageName,
		PackagePath:            packagePath,
		PackagePathUnderscores: strings.Replace(packagePath, "/", "_", -1),
		NameUpper:              toUpper(templateName),
		NameLower:              toLower(templateName),
		Html:                   sOut,
		Items:                  items,
		Defs:                   defs,
		Names:                  names,
	}
	v.AppendTemplate(packagePath, temp)
}
Пример #5
0
func main() {
	flag.Parse()
	if *url == "" {
		flag.Usage()
		os.Exit(1)
	}
	r, finalurl, err := http.Get(*url)
	if err != nil {
		fmt.Println("Unable to get url")
		os.Exit(1)
	}

	fmt.Println("sucessfully opened", finalurl)

	var board string
	var threadId string
	finalurl = strings.Replace(finalurl, "/", " ", 5)
	fmt.Sscanf(finalurl, "http:  boards.4chan.org %s res %s", &board, &threadId)
	fmt.Println("Using board: ", board)

	urlMap := map[string]string{}

	z := html.NewTokenizer(r.Body)

	for {
		if z.Next() == html.ErrorToken {
			if z.Error() == os.EOF {
				fmt.Println("Parsed HTML data...")
				break
			} else {
				fmt.Println("exiting due to error: ", z.Error())
				os.Exit(1)
			}
		}

		myToken := z.Token()

		if myToken.Type == html.StartTagToken && myToken.Data == "a" {
			for _, v := range myToken.Attr {
				if v.Key == "href" {
					// Now we need to search for http://images.4chan.org/b/src/
					searchString := fmt.Sprintf("http://images.4chan.org/%s/src/", board)
					if strings.Contains(v.Val, searchString) {
						urlMap[v.Val] = v.Val
					}
				}
			}
		}
	}
	for _, v := range urlMap {
		fmt.Println("Getting: ", v)
		imgResp, _, err := http.Get(v)

		if err != nil {
			fmt.Println("Status : ", imgResp.Status, "Status Code: ", imgResp.StatusCode)
			fmt.Println(err)
			os.Exit(1)
		}

		imageData := make([]byte, imgResp.ContentLength)
		if n, err := io.ReadFull(imgResp.Body, imageData); err != nil {
			fmt.Println(err)
			fmt.Println("Mangaged to read ", n, "bytes")
		}

		fileName := path.Base(v)

		wd, err := os.Getwd()
		if err != nil {
			fmt.Println(err)
			os.Exit(1)
		}

		filePath := fmt.Sprintf("%s/%s/", wd, board)
		err = os.MkdirAll(filePath, 0777)
		if err != nil {
			fmt.Println(err)
			os.Exit(1)
		}

		fullPath := fmt.Sprintf("%s%s", filePath, fileName)
		file, err := os.Create(fullPath)
		if err != nil {
			fmt.Println(err)
			os.Exit(1)
		}

		defer file.Close()

		_, err = file.Write(imageData)
		if err != nil {
			fmt.Println(err)
			os.Exit(1)
		}

		fmt.Println("Stored: ", fullPath)
		fmt.Println()
	}
}