func DiscoverHtml(id string) (*string, os.Error) { resp, _, err := http.Get(id) if err != nil { return nil, err } defer resp.Body.Close() tokenizer := html.NewTokenizer(resp.Body) for { tt := tokenizer.Next() switch tt { case html.ErrorToken: log.Println("Error: ", tokenizer.Error()) return nil, tokenizer.Error() case html.StartTagToken, html.EndTagToken: tk := tokenizer.Token() if tk.Data == "link" { ok := false for _, attr := range tk.Attr { if attr.Key == "rel" && attr.Val == "openid2.provider" { log.Println(tk.String()) ok = true } else if attr.Key == "href" && ok { return &attr.Val, nil } } } } } return nil, &DiscoveryError{str: "provider not found"} }
func main() { tokenizer := html.NewTokenizer(page.Body) foundStart := false for { ty := tonkenizer.Next() if ty == html.ErrorToken { break } if ty != html.StartTagToken { continue } t := tokenizer.Token() if t.Data != "a" { continue } for _, attr := range t.Attr { if "href" == attr.Key { if !foundStart || ((len(attr.Val) > 4) && "http" == attr.Val[0:4]) { if ".." == attr.Val { foundStart = true } break } fmt.Printf("%s\n", attr.Val) } } } }
// getHTMLTitle parses r as HTML and returns its title. An empty string is // returned if no <title> tag is found or error happens. Please note that this // function doesn't check for the <html> -> <head> -> <title> tag hierarchy, // but just picks the first <title> tag and returns its text. func getHTMLTitle(r io.Reader) string { z := html.NewTokenizer(r) for { tt := z.Next() if tt == html.ErrorToken { // probably the end of buffer, not an actual error break } if tt == html.StartTagToken { tn, _ := z.TagName() if string(tn) == "title" { // found the <title> tag, now return the next // token, which is actually the text just // after <title> z.Next() return strings.Trim(z.Token().String(), " \t\n\r") } } } // return empty in case of error or when no <title> tag was found return "" }
func (v *visitor) VisitFile(path string, fi *os.FileInfo) { baseDir := HtmlDir if strings.HasSuffix(HtmlDir, "/") { baseDir = HtmlDir[0 : len(HtmlDir)-1] } baseDirName := baseDir[strings.LastIndex(baseDir, "/")+1:] packageAndFilename := path[len(baseDir)+1:] if !strings.Contains(packageAndFilename, "/") { return } packageName := packageAndFilename[0:strings.LastIndex(packageAndFilename, "/")] packagePath := baseDirName + "/" + packageName fmt.Print(packagePath, " ", fi.Name) if !strings.HasSuffix(fi.Name, ".html") { fmt.Println(" (skipping)") return } else { fmt.Println(" (processing)") } templateName := fi.Name[0:strings.Index(fi.Name, ".")] s := "" f, _ := os.Open(path) t := html.NewTokenizer(f) items := []Item{} root := Tag{ Contents: make([]Element, 0), } currentElement := &root for { tt := t.Next() if tt == html.ErrorToken { if t.Error().String() == "EOF" { break } } token := t.Token() switch token.Type { case html.TextToken: currentElement.Contents = append(currentElement.Contents, &Text{ Text: token.Data, parent: currentElement, }) s1 := strings.Replace(token.Data, `"`, `\"`, -1) s2 := strings.Replace(s1, ` `, `\n`, -1) s += s2 case html.StartTagToken, html.SelfClosingTagToken: tagPlainText := `` att := token.Attr tagId := `` tagPlainText += `<` + token.Data tagAttributes := make(map[string]string) tagStyles := make(map[string]string) for _, v := range att { val := v.Val if strings.ToLower(v.Key) == "id" { val = `"+id+"_` + v.Val newItem := Item{ Id: v.Val, ItemNameLower: toLower(v.Val), ItemNameUpper: toUpper(v.Val), } items = append(items, newItem) tagId = v.Val } else if strings.ToLower(v.Key) == `style` { tagStyles = makeMap(v.Val, `;`, `:`) } else { tagAttributes[v.Key] = v.Val } tagPlainText += ` ` + v.Key + `=\"` + val + `\"` } if token.Type == html.StartTagToken { tagPlainText += `>` } else { tagPlainText += ` />` } s += tagPlainText newTag := &Tag{ Id: tagId, Name: token.Data, Attributes: tagAttributes, Styles: tagStyles, Contents: make([]Element, 0), parent: currentElement, } currentElement.Contents = append(currentElement.Contents, newTag) if token.Type == html.StartTagToken { currentElement = newTag } case html.EndTagToken: tagPlainText := fmt.Sprint(`</`, token.Data, `>`) s += tagPlainText currentElement = currentElement.Parent() } } sOut := `<script>function template_` + templateName + `(id){return "` + s + `"}</script>` defs := `` names := `` sequence := 0 namesMap := make(map[string]string) for _, v := range root.Contents { newSequence, name, def := v.Definition(sequence, namesMap) defs += def names += name + `, ` sequence = newSequence } for i := 0; i < len(items); i++ { items[i].Variable = namesMap[items[i].Id] } temp := Template{ PackageName: packageName, PackagePath: packagePath, PackagePathUnderscores: strings.Replace(packagePath, "/", "_", -1), NameUpper: toUpper(templateName), NameLower: toLower(templateName), Html: sOut, Items: items, Defs: defs, Names: names, } v.AppendTemplate(packagePath, temp) }
func main() { flag.Parse() if *url == "" { flag.Usage() os.Exit(1) } r, finalurl, err := http.Get(*url) if err != nil { fmt.Println("Unable to get url") os.Exit(1) } fmt.Println("sucessfully opened", finalurl) var board string var threadId string finalurl = strings.Replace(finalurl, "/", " ", 5) fmt.Sscanf(finalurl, "http: boards.4chan.org %s res %s", &board, &threadId) fmt.Println("Using board: ", board) urlMap := map[string]string{} z := html.NewTokenizer(r.Body) for { if z.Next() == html.ErrorToken { if z.Error() == os.EOF { fmt.Println("Parsed HTML data...") break } else { fmt.Println("exiting due to error: ", z.Error()) os.Exit(1) } } myToken := z.Token() if myToken.Type == html.StartTagToken && myToken.Data == "a" { for _, v := range myToken.Attr { if v.Key == "href" { // Now we need to search for http://images.4chan.org/b/src/ searchString := fmt.Sprintf("http://images.4chan.org/%s/src/", board) if strings.Contains(v.Val, searchString) { urlMap[v.Val] = v.Val } } } } } for _, v := range urlMap { fmt.Println("Getting: ", v) imgResp, _, err := http.Get(v) if err != nil { fmt.Println("Status : ", imgResp.Status, "Status Code: ", imgResp.StatusCode) fmt.Println(err) os.Exit(1) } imageData := make([]byte, imgResp.ContentLength) if n, err := io.ReadFull(imgResp.Body, imageData); err != nil { fmt.Println(err) fmt.Println("Mangaged to read ", n, "bytes") } fileName := path.Base(v) wd, err := os.Getwd() if err != nil { fmt.Println(err) os.Exit(1) } filePath := fmt.Sprintf("%s/%s/", wd, board) err = os.MkdirAll(filePath, 0777) if err != nil { fmt.Println(err) os.Exit(1) } fullPath := fmt.Sprintf("%s%s", filePath, fileName) file, err := os.Create(fullPath) if err != nil { fmt.Println(err) os.Exit(1) } defer file.Close() _, err = file.Write(imageData) if err != nil { fmt.Println(err) os.Exit(1) } fmt.Println("Stored: ", fullPath) fmt.Println() } }