func (bsState *BsState) evaluateHtml(strUrl string, v interface{}) (*BsResult, error) { switch tv := v.(type) { case io.Reader: words, title := html.TokenizePage(tv) score := bsState.EvaluateBs(words) return &BsResult{title, score}, nil case string: parsedUrl, err := url.Parse(tv) if err != nil { return nil, err } var pageContent []byte if parsedUrl.Scheme == "http" || parsedUrl.Scheme == "https" { pageContent, err = html.DownloadPage(tv) } else { pageContent, err = ioutil.ReadFile(tv) } if err != nil { return nil, err } r := bytes.NewReader(pageContent) return bsState.evaluateHtml(strUrl, r) default: return nil, errors.New(fmt.Sprintf("Unhandled parameter %q", v)) } }
func (bsState *BsState) trainWithHtmlFile(filename string, bs bool) { file, err := os.Open(filename) if err != nil { log.Printf("Error on opening file %s", err) return } words, _ := html.TokenizePage(file) bsState.enlargeCorpus(words, bs) }
func (bsState *BsState) trainWithPageContent(content []byte, bs bool) { words, _ := html.TokenizePage(bytes.NewReader(content)) bsState.enlargeCorpus(words, bs) }