Пример #1
0
func (bsState *BsState) evaluateHtml(strUrl string, v interface{}) (*BsResult, error) {
	switch tv := v.(type) {
	case io.Reader:
		words, title := html.TokenizePage(tv)
		score := bsState.EvaluateBs(words)
		return &BsResult{title, score}, nil
	case string:
		parsedUrl, err := url.Parse(tv)
		if err != nil {
			return nil, err
		}
		var pageContent []byte
		if parsedUrl.Scheme == "http" || parsedUrl.Scheme == "https" {
			pageContent, err = html.DownloadPage(tv)
		} else {
			pageContent, err = ioutil.ReadFile(tv)
		}
		if err != nil {
			return nil, err
		}
		r := bytes.NewReader(pageContent)
		return bsState.evaluateHtml(strUrl, r)
	default:
		return nil, errors.New(fmt.Sprintf("Unhandled parameter %q", v))
	}
}
Пример #2
0
func (bsState *BsState) trainWithHtmlFile(filename string, bs bool) {
	file, err := os.Open(filename)
	if err != nil {
		log.Printf("Error on opening file %s", err)
		return
	}
	words, _ := html.TokenizePage(file)
	bsState.enlargeCorpus(words, bs)
}
Пример #3
0
func (bsState *BsState) trainWithPageContent(content []byte, bs bool) {
	words, _ := html.TokenizePage(bytes.NewReader(content))
	bsState.enlargeCorpus(words, bs)
}