func main() { s := `<p>Links:<a href="a1" class="test"/></p><ul><li><a href="foo">Foo</a><li><a href="/bar/baz">BarBaz</a></ul>` doc, _ := html.Parse(strings.NewReader(s)) traverse_html_node(doc, 0) z := html.NewTokenizer(strings.NewReader(s)) traverse_html_tokenizer(z) z1 := html.NewTokenizer(strings.NewReader(s)) traverse_html_token(z1) }
func TokenizePage(r io.Reader) ([]string, string) { res := []string{} z := html.NewTokenizer(r) isTitle := false title := "" loop: for { tt := z.Next() switch tt { case html.ErrorToken: break loop case html.TextToken: text := string(z.Text()) if isTitle { title = cleanTitle(text) continue } res = append(res, bstrings.TokenizeWords(text)...) case html.EndTagToken: tn, _ := z.TagName() if string(tn) == "title" { isTitle = false } case html.StartTagToken: tn, _ := z.TagName() if string(tn) == "title" { isTitle = true } } } return res, title }
func FindLinks(body io.Reader) chan link { c := make(chan link) go func() { z := html.NewTokenizer(body) for { tt := z.Next() if tt == html.ErrorToken { break } if tt == html.StartTagToken { tn, _ := z.TagName() if len(tn) == 1 && tn[0] == 'a' { for { key, value, more := z.TagAttr() // http://stackoverflow.com/questions/14230145/what-is-the-best-way-to-convert-byte-array-to-string if string(key) == "href" { v := string(value) // http://codereview.stackexchange.com/questions/28386/fibonacci-generator-with-golang c <- link{v, v} } if !more { break } } } } } c <- link{"", ""} }() return c }
// getLinks parses the response for links, doing it's best with bad HTML. func getLinks(contents []byte) ([]*URL, error) { utf8Reader, err := charset.NewReader(bytes.NewReader(contents), "text/html") if err != nil { return nil, err } tokenizer := html.NewTokenizer(utf8Reader) var links []*URL tags := getIncludedTags() for { tokenType := tokenizer.Next() switch tokenType { case html.ErrorToken: //TODO: should use tokenizer.Err() to see if this is io.EOF // (meaning success) or an actual error return links, nil case html.StartTagToken: tagName, hasAttrs := tokenizer.TagName() if hasAttrs && tags[string(tagName)] { links = parseAnchorAttrs(tokenizer, links) } } } return links, nil }
// Search for // <head> // <meta http-equiv="X-XRDS-Location" content="...."> func findMetaXrdsLocation(input io.Reader) (location string, err error) { tokenizer := html.NewTokenizer(input) inHead := false for { tt := tokenizer.Next() switch tt { case html.ErrorToken: return "", tokenizer.Err() case html.StartTagToken, html.EndTagToken: tk := tokenizer.Token() if tk.Data == "head" { if tt == html.StartTagToken { inHead = true } else { return "", errors.New("Meta X-XRDS-Location not found") } } else if inHead && tk.Data == "meta" { ok := false content := "" for _, attr := range tk.Attr { if attr.Key == "http-equiv" && attr.Val == "X-XRDS-Location" { ok = true } else if attr.Key == "content" { content = attr.Val } } if ok && len(content) > 0 { return content, nil } } } } return "", errors.New("Meta X-XRDS-Location not found") }
func ExtractText(reader io.Reader, remover func(string) (string, error)) (string, error) { z := html.NewTokenizer(reader) var buf bytes.Buffer bodyBlock := false loop: for { tokenType := z.Next() switch tokenType { case html.StartTagToken: if z.Token().DataAtom == atom.Body { bodyBlock = true } case html.EndTagToken: if z.Token().DataAtom == atom.Body { bodyBlock = false } case html.TextToken: if bodyBlock { buf.Write(z.Text()) } case html.ErrorToken: if z.Err() != io.EOF { return "", z.Err() } break loop } } return remover(buf.String()) }
func html_detect_content_type(head []byte) string { reader := bytes.NewReader(head) z := html.NewTokenizer(reader) expect_html_root := true FORBEGIN: for tt := z.Next(); tt != html.ErrorToken; tt = z.Next() { t := z.Token() switch { case t.Data == "meta" && (tt == html.StartTagToken || tt == html.SelfClosingTagToken): if ct, ok := detect_charset_by_token(t.Attr); ok == true { return ct } case t.Data == "head" && tt == html.EndTagToken: break // un-html file case expect_html_root && (tt == html.StartTagToken || tt == html.SelfClosingTagToken): if t.Data == "html" { expect_html_root = false } else { break FORBEGIN } } } return "" }
// Returns the href attribute of a <link rel="shortcut icon"> tag or error if not found. func FindIcon(b []byte) (string, error) { r := bytes.NewReader(b) z := html.NewTokenizer(r) for { if z.Next() == html.ErrorToken { if err := z.Err(); err == io.EOF { break } else { return "", ErrNoIcon } } t := z.Token() switch t.DataAtom { case atom.Link: if t.Type == html.StartTagToken || t.Type == html.SelfClosingTagToken { attrs := make(map[string]string) for _, a := range t.Attr { attrs[a.Key] = a.Val } if attrs["rel"] == "shortcut icon" && attrs["href"] != "" { return attrs["href"], nil } } } } return "", ErrNoIcon }
func linkParser(page_chan chan string) <-chan string { link_chan := make(chan string) go func() { for page := range page_chan { //page := <-page_chan page_bytes := bytes.NewBufferString(page) d := html.NewTokenizer(io.Reader(page_bytes)) for { tokenType := d.Next() if tokenType == html.ErrorToken { fmt.Println("\nFinished to parse page") break } token := d.Token() switch tokenType { case html.StartTagToken: if strings.EqualFold(token.Data, "A") { for _, a := range token.Attr { if strings.EqualFold(a.Key, "HREF") { link_chan <- a.Val } } } } } } close(link_chan) }() return link_chan }
func TestPushHTML(t *testing.T) { xmlns := NewXmlNamespace() for i := range xmlNsSamples { j := 0 z := html.NewTokenizer(strings.NewReader(xhtmlNsSamples[i].sample)) for { tt := z.Next() if tt == html.ErrorToken { err := z.Err() if err == io.EOF { err = nil break } t.Fatal(err) } switch tt { case html.StartTagToken, html.SelfClosingTagToken: xmlns.PushHTML(z.Token()) checkState("push", j, xmlns, xhtmlNsSamples[i].prefix[j], xhtmlNsSamples[i].uri[j], t) j++ case html.EndTagToken: j-- checkState("pop", j, xmlns, xhtmlNsSamples[i].prefix[j], xhtmlNsSamples[i].uri[j], t) xmlns.Pop() } } } }
func Sanitize(s string) (string, string) { r := bytes.NewReader([]byte(s)) z := html.NewTokenizer(r) buf := &bytes.Buffer{} snip := &bytes.Buffer{} scripts := 0 for { if z.Next() == html.ErrorToken { if err := z.Err(); err == io.EOF { break } else { return s, snipper(s) } } t := z.Token() if t.DataAtom == atom.Script { if t.Type == html.StartTagToken { scripts++ } else if t.Type == html.EndTagToken { scripts-- } } else if scripts == 0 { buf.WriteString(t.String()) if t.Type == html.TextToken { snip.WriteString(t.String()) } } } return buf.String(), snipper(snip.String()) }
func Autodiscover(b []byte) (string, error) { r := bytes.NewReader(b) z := html.NewTokenizer(r) inHtml := false inHead := false for { if z.Next() == html.ErrorToken { if err := z.Err(); err == io.EOF { break } else { return "", ErrNoRssLink } } t := z.Token() switch t.DataAtom { case atom.Html: inHtml = !inHtml case atom.Head: inHead = !inHead case atom.Link: if inHead && inHtml && (t.Type == html.StartTagToken || t.Type == html.SelfClosingTagToken) { attrs := make(map[string]string) for _, a := range t.Attr { attrs[a.Key] = a.Val } if attrs["rel"] == "alternate" && attrs["href"] != "" && (attrs["type"] == "application/rss+xml" || attrs["type"] == "application/atom+xml") { return attrs["href"], nil } } } } return "", ErrNoRssLink }
func GetAllLinks(data io.ReadCloser) (links []string, err error) { tokenizer := html.NewTokenizer(data) for { tokenizer.Next() token := tokenizer.Token() switch token.Type { case html.ErrorToken: return case html.EndTagToken: case html.CommentToken: case html.TextToken: case html.StartTagToken, html.SelfClosingTagToken: if *debug { log.Print("type ", token.Type) log.Print("data ", token.Data) } if token.Data == "a" { for _, a := range token.Attr { if a.Key == "href" { for _, ext := range strings.Split(*fileType, ",") { if strings.HasSuffix(a.Val, ext) { if strings.HasPrefix(a.Val, "//") { links = append(links, "http:"+a.Val) } else { links = append(links, a.Val) } } } } } } } } return }
// Given the HTML of a Goodreads bookshelf, returns the books. func bookshelfToBooks(body io.ReadCloser) (books []Book) { z := html.NewTokenizer(body) books = make([]Book, 100) for i := 0; i < 1000; { book := new(Book) tok := z.Next() // fmt.Println(tok) if tok == html.ErrorToken { // ... return books } _, atr, _ := z.TagAttr() if strings.Contains(string(atr), "/book/show") { _, atr, _ := z.TagAttr() book.title = string(string(atr)) // fmt.Println("Got book:", book.title) } else if strings.Contains(string(atr), "staticStars") { _, atr, _ := z.TagAttr() book.rating = getRating(string(atr)) } if book.title != "" { books[i] = *book i++ } } return books }
func Parse(reader io.Reader) (newPost *post.Post, err error) { newPost = &post.Post{} currentIdx := 0 parsers := []post.PartParser{&ReceiverParser{}, &SenderParser{}, &SubjectParser{}, &PostDateParser{}, &ContentParser{}} linkParser := &LinkParser{} bodyBlock := false z := html.NewTokenizer(reader) loop: for { tokenType := z.Next() switch tokenType { case html.StartTagToken: tk := z.Token() if tk.DataAtom == atom.Body { bodyBlock = true } else if tk.DataAtom == atom.A { for _, attr := range tk.Attr { if attr.Key == "href" { linkParser.Parse(newPost, []byte(attr.Val)) } } } case html.EndTagToken: if z.Token().DataAtom == atom.Body { bodyBlock = false } case html.TextToken: if bodyBlock { flow := parsers[currentIdx].Parse(newPost, z.Text()) switch flow { case post.Next: if currentIdx < len(parsers) { currentIdx += 1 } case post.Error: err = parsers[currentIdx].Err() break loop case post.Stop: break loop } } case html.ErrorToken: if z.Err() != io.EOF { err = z.Err() } break loop } } if currentIdx != len(parsers)-1 { err = errors.New("malformed Post format") } return }
func findProviderFromHeadLink(input io.Reader) (opEndpoint, opLocalId string, err error) { tokenizer := html.NewTokenizer(input) inHead := false for { tt := tokenizer.Next() switch tt { case html.ErrorToken: // Even if the document is malformed after we found a // valid <link> tag, ignore and let's be happy with our // openid2.provider and potentially openid2.local_id as well. if len(opEndpoint) > 0 { return } return "", "", tokenizer.Err() case html.StartTagToken, html.EndTagToken: tk := tokenizer.Token() if tk.Data == "head" { if tt == html.StartTagToken { inHead = true } else { if len(opEndpoint) > 0 { return } return "", "", errors.New( "LINK with rel=openid2.provider not found") } } else if inHead && tk.Data == "link" { provider := false localId := false href := "" for _, attr := range tk.Attr { if attr.Key == "rel" { if attr.Val == "openid2.provider" { provider = true } else if attr.Val == "openid2.local_id" { localId = true } } else if attr.Key == "href" { href = attr.Val } } if provider && !localId && len(href) > 0 { opEndpoint = href } else if !provider && localId && len(href) > 0 { opLocalId = href } } } } // At this point we should probably have returned either from // a closing </head> or a tokenizer error (no </head> found). // But just in case. if len(opEndpoint) > 0 { return } return "", "", errors.New("LINK rel=openid2.provider not found") }
func (c *Crawl) Scan(surl string) { //fmt.Printf("scanning %s\n",surl) resp := c.R.LaunchNoRead("GET", surl, "") if resp == nil || resp.Body == nil { //fmt.Println("nil response: "+surl) return } defer resp.Body.Close() page := html.NewTokenizer(resp.Body) for { tokenType := page.Next() if tokenType == html.ErrorToken { c.Crawled = append(c.Crawled, surl) return } token := page.Token() //if tokenType == html.StartTagToken { //&& token.DataAtom.String() == "a" { for _, attr := range token.Attr { if attr.Key == "href" || attr.Key == "action" || attr.Key == "src" { res := c.FixUrl(attr.Val) if res != "" && !c.IsRepeated(res) { oUrl, err := url.Parse(res) if err == nil { if oUrl.Host == c.Host { var test string idx := strings.LastIndex(oUrl.Path, ".") if idx >= 0 { oUrl.Path = oUrl.Path[0:idx] + "test1337" + oUrl.Path[idx+1:] //TODO: si la url acaba en punto, crashea out of index test = oUrl.String() } else { test = res } //fmt.Printf("test:%s\n",test) _, code_not_found, _ := R.Get(test) html, code, _ := R.Get(res) if code != code_not_found { P.Show("c", code, len(html), res) c.Resources = append(c.Resources, res) c.NewResources = append(c.NewResources, res) } } } } } } } }
func main() { urls := make([]string, 0, 75) resp, err := http.Get("http://opensource.org/licenses/alphabetical") if err != nil { fmt.Println(err) return } z := html.NewTokenizer(resp.Body) for { tok := z.Next() if tok == html.ErrorToken { //fmt.Println("reached error") break } if tok != html.StartTagToken { //fmt.Println("not a start tag") continue } tagName, hasAttr := z.TagName() if string(tagName) != "a" { //fmt.Println(string(tagName), " is not 'a'") continue } if !hasAttr { //fmt.Println("tag has no attributes") continue } href := "" for { attr, val, more := z.TagAttr() if string(attr) == "href" { //fmt.Println("Found href: ", string(val)) href = string(val) } if !more { break } } if strings.HasPrefix(href, "/licenses/") { href = strings.Replace(href, "/licenses/", "", 1) if href == strings.ToLower(href) { continue } urls = append(urls, href) } } for _, license := range urls { getLicense(license) } }
// parse parses a stirng and converts it into an html. func parse(s string) *htmlDocument { htmlDoc := &htmlDocument{} tokenizer := html.NewTokenizer(strings.NewReader(s)) for { if errorToken, _, _ := parseToken(tokenizer, htmlDoc, nil); errorToken { break } } return htmlDoc }
func TestXMLBasePushHTML(t *testing.T) { for i, v := range xmlBaseTests { xmlbase, err := NewXmlBase("") if err != nil { t.Fatal(i, err) } if verbose { fmt.Println(i, "created", xmlbase.baseUri, xmlbase.depth) } z := html.NewTokenizer(strings.NewReader(v.example)) r := 0 for { tt := z.Next() switch tt { case html.ErrorToken: err = z.Err() if err == io.EOF { return } t.Fatal(i, err) case html.StartTagToken: node := z.Token() xmlbase.PushHTML(node) if verbose { fmt.Println(i, "pushed", xmlbase.baseUri, xmlbase.depth) } for _, attr := range node.Attr { if attr.Key == v.resolve[r].html.Key { if verbose { fmt.Println(i, "verify", attr, v.resolve[r].iri) } iri, err := xmlbase.Resolve(attr.Val) if err != nil { t.Fatal(i, r, err) } if iri != v.resolve[r].iri { t.Fatalf("%d %d expected '%s', got '%s'", i, r, v.resolve[r].iri, iri) } r++ } } case html.EndTagToken: xmlbase.Pop() if verbose { fmt.Println(i, "popped", xmlbase.baseUri, xmlbase.depth) } } } } }
func fetch_description(num int) (n string, err error) { n = "ok" url := fmt.Sprintf("http://projecteuler.net/problem=%d", num) resp, err := http.Get(url) if err != nil { fmt.Printf("Error fetching description: %v", err.Error()) return n, err } var desc bytes.Buffer //buf.ReadFrom(resp.Body) z := html.NewTokenizer(resp.Body) in_desc := false desc_depth := 0 depth := 0 for { tt := z.Next() switch tt { case html.ErrorToken: fmt.Printf("returning ErrorToken, captured %v", string(desc.Bytes())) return string(desc.Bytes()), err case html.TextToken: if in_desc { desc.Write(z.Text()) } case html.StartTagToken, html.EndTagToken: tn, _ := z.TagName() stn := string(tn) if stn == "div" { if tt == html.StartTagToken { depth++ key, val, _ := z.TagAttr() if string(key) == "class" && string(val) == "problem_content" { in_desc = true desc_depth = depth } } else { depth-- if in_desc && depth < desc_depth { return string(desc.Bytes()), err } } } } } n = string(desc.Bytes()) resp.Body.Close() return n, err }
// Gets latest version numbers of vim plugins from vim.org // Takes one argment, the ID of the script on vim.org // Returns a string with the version, and an error (if any) func getVersionFromVimDotOrg(scriptID string) (string, error) { url := "http://www.vim.org/scripts/script.php?script_id=" + scriptID resp, err := http.Get(url) if err != nil { return "", err } defer resp.Body.Close() tokenizer := html.NewTokenizer(resp.Body) // vim.org doesn't annotate their html entities very well, // so we use this variable to keep track of which column in the table we are looking at // Version #'s are in the second column columnInDataTable := 0 // This loop exits when we find the version, or the tokenizer runs out of input for { tokenType := tokenizer.Next() switch tokenType { case html.ErrorToken: // we either can't parse the HTML, or we're done // In either case, we haven't found a good version return "", tokenizer.Err() case html.StartTagToken: token := tokenizer.Token() // If this is a table data, it might be part of the data table if token.DataAtom == atom.Lookup([]byte("td")) { for _, attribute := range token.Attr { // If this is annotated with class=rowodd or roweven, this is a field in the data table if attribute.Key == "class" && (strings.Contains(attribute.Val, "rowodd") || strings.Contains(attribute.Val, "roweven")) { // We have seen one more field in the data table columnInDataTable++ } } } break case html.EndTagToken: // If this is the end of a table row, we reset the number of data fields seen if tokenizer.Token().DataAtom == atom.Lookup([]byte("tr")) { columnInDataTable = 0 } break case html.TextToken: token := tokenizer.Token() // If this is the second column in the table, it is the version column. // Because vim.org sorts the data table with the most recent version at the top, // we can return the first version we find, as it must be the most recent. if columnInDataTable == 2 && strings.TrimSpace(token.String()) != "" { return token.String(), nil } break } } }
// parseHTML bekommt eine komplette HTML Seite // und legt eine Map mit Wörtern und (viele) einzelne Links in entsprechende Channels func parseHtml(a HTTPRESP) { //start := time.Now() d := html.NewTokenizer(a.FD) var words map[string]int words = make(map[string]int) for { // token type tokenType := d.Next() // ErrorToken kommt (auch) beim Ende der Daten if tokenType == html.ErrorToken { chan_urlindexes <- URLINDEX{a.URL, words} // WORD-Map in den Channel legen //fmt.Printf("Parse-Dauer : [%.2fs] URL: %s\n", time.Since(start).Seconds(), a.URL) return } token := d.Token() switch tokenType { case html.StartTagToken: // <tag> // Links finden if token.Data == "a" { for _, element := range token.Attr { if element.Key == "href" { // Link normalisieren ref_url, err := url.Parse(element.Val) // geparste URL base_url, _ := url.Parse(a.URL) // Basis URL der geparsten Seite comp_url := base_url.ResolveReference(ref_url) // zusammengesetzte url oder falls ref_url==absoluteurl->ref_url // Nur Links die nicht in der globalen Link Map sind if err == nil && comp_url.Scheme == "http" && crwldurls[comp_url.String()] != true && a.LINKDEPTH < MaxLinkDepth { crwldurls[comp_url.String()] = true //URL in die globale URL Liste aufnehmen damit sie nicht nochmal in den Work Queue kommt. chan_urls <- URL{comp_url.String(), a.LINKDEPTH + 1} // Die URL in den Channel legen und Linktiefe hochzählen } } } } case html.TextToken: // text between start and end tag //Map mit Wörtern erstellen temp := strings.Fields(token.Data) //Aufteilen in Einzelne Wörter, trennen bei Whitespace for _, element := range temp { //TODO: einzelne Örter noch besser von Sonderzeichen trennen z.b. mit TRIM() words[element] = words[element] + 1 } //fmt.Printf("%q\n", temp) case html.EndTagToken: // </tag> case html.SelfClosingTagToken: // <tag/> } } }
func (p *Post) Clean() string { z := html.NewTokenizer(strings.NewReader(string(p.HTML))) var buffer bytes.Buffer loop: for { switch tt := z.Next(); tt { case html.ErrorToken: break loop case html.TextToken: buffer.Write(z.Text()) } } return string(bytes.TrimSpace(ws.ReplaceAll(buffer.Bytes(), []byte{' '}))) }
// Clean returns the sanitized HTML (based on a tag and attribute whitelist) and // the text contents of s. Links are made relative to u, if non-nil. func Clean(s string, u *url.URL) (string, string) { r := bytes.NewReader([]byte(strings.TrimSpace(s))) z := html.NewTokenizer(r) buf := &bytes.Buffer{} strip := &bytes.Buffer{} skip := 0 if u != nil { u.RawQuery = "" u.Fragment = "" } for { if z.Next() == html.ErrorToken { if err := z.Err(); err == io.EOF { break } else { return s, s } } t := z.Token() if t.Type == html.StartTagToken || t.Type == html.SelfClosingTagToken { if !AcceptableElements[t.Data] { if UnacceptableElementsWithEndTag[t.Data] && t.Type != html.SelfClosingTagToken { skip += 1 } } else { cleanAttributes(u, &t) buf.WriteString(t.String()) } } else if t.Type == html.EndTagToken { if !AcceptableElements[t.Data] { if UnacceptableElementsWithEndTag[t.Data] { skip -= 1 } } else { buf.WriteString(t.String()) } } else if skip == 0 { buf.WriteString(t.String()) if t.Type == html.TextToken { strip.WriteString(t.String()) } } } return buf.String(), strip.String() }
func main() { start := "http://www.panynj.gov/path/full-schedules.html" res, err := http.Get(start) if err != nil { log.Fatal(err) } z := html.NewTokenizer(res.Body) for { tt := z.Next() if tt == html.ErrorToken { break } if tt == html.StartTagToken { tn, _ := z.TagName() if len(tn) == 1 && tn[0] == 'a' { for { key, value, more := z.TagAttr() // http://stackoverflow.com/questions/14230145/what-is-the-best-way-to-convert-byte-array-to-string if string(key) == "href" { v := string(value) if strings.HasPrefix(v, "schedules/") { fuckedurl := path.Join(path.Dir(start), v) // yep, hack it // thx go for making me rename the variable url := strings.Replace(fuckedurl, ":/", "://", 1) fmt.Printf("%s\n", url) fetch(url) } } if !more { break } } } // ... //return ... } // Process the current token. } res.Body.Close() if err != nil { log.Fatal(err) } }
func _parseHTML(r io.Reader, ch chan<- resource) { defer func() { close(ch) }() z := html.NewTokenizer(r) findAttr := func(name string) string { lname := strings.ToLower(name) moreAttr := true for moreAttr { var key, val []byte key, val, moreAttr = z.TagAttr() if strings.ToLower(string(key)) == lname { return strings.Split(string(val), "#")[0] } } return "" } for { tokenType := z.Next() switch tokenType { case html.ErrorToken: return case html.StartTagToken, html.SelfClosingTagToken: tagName, hasAttr := z.TagName() if !hasAttr { continue } ltag := strings.ToLower(string(tagName)) attrName, ok := attrNameMap[ltag] if !ok { continue } if attr := findAttr(attrName); attr != "" { ch <- resource{ltag, attr} } default: } } }
func ParseLink(reader io.Reader) []string { links := make([]string, 0) page := html.NewTokenizer(reader) for { tokenType := page.Next() if tokenType == html.ErrorToken { return links } token := page.Token() if tokenType == html.StartTagToken && token.DataAtom.String() == "a" { for _, attr := range token.Attr { if attr.Key == "href" { links = append(links, attr.Val) } } } } return links }
func Extract(r io.Reader, extractor Extractor) error { z := html.NewTokenizer(r) for { tt := z.Next() switch tt { case html.ErrorToken: switch z.Err() { case io.EOF: return nil default: return z.Err() } default: token := z.Token() extractor.HandleToken(token) } } }
func ParseWeather(r io.Reader) []string { res := []string{} z := html.NewTokenizer(r) inTablePluie := false candidateText := false horaire := "" loop: for { tt := z.Next() switch tt { case html.ErrorToken: break loop case html.TextToken: if candidateText { text := strings.TrimSpace(string(z.Text())) if text != "" { if horaire == "" { horaire = text } else { res = append(res, fmt.Sprintf("%s : %s", horaire, text)) horaire = "" } } } case html.EndTagToken: candidateText = false if hasTablPluieClass(z) { return res } break case html.StartTagToken: if !inTablePluie && hasTablPluieClass(z) { inTablePluie = true } else if inTablePluie { tn, _ := z.TagName() candidateText = string(tn) == "td" } } } return res }