func (bsState *BsState) processTrainQuery(query BsQuery) ([]string, error) { storage := bsState.getStorage(query.Bs) processed := []string{} for _, strUrl := range query.Urls { pageContent, err := html.DownloadPage(strUrl) if err != nil { return []string{}, err } parsedUrl, err := url.Parse(strUrl) if err != nil { return []string{}, err } processed = append(processed, strUrl) if isPdf(strUrl) { textPdf := savePdfToText(parsedUrl, pageContent, storage) bsState.trainWithTextFile(textPdf, query.Bs) } else { saveUrl(parsedUrl, pageContent, storage) bsState.trainWithPageContent(pageContent, query.Bs) } } if query.Phrase != "" { dest := bsState.getPhraseStorage(query.Bs) appendPhrase(query.Phrase, dest) bsState.trainWithPhrase(query.Phrase, query.Bs) } return processed, nil }
func (bsState *BsState) evaluateHtml(strUrl string, v interface{}) (*BsResult, error) { switch tv := v.(type) { case io.Reader: words, title := html.TokenizePage(tv) score := bsState.EvaluateBs(words) return &BsResult{title, score}, nil case string: parsedUrl, err := url.Parse(tv) if err != nil { return nil, err } var pageContent []byte if parsedUrl.Scheme == "http" || parsedUrl.Scheme == "https" { pageContent, err = html.DownloadPage(tv) } else { pageContent, err = ioutil.ReadFile(tv) } if err != nil { return nil, err } r := bytes.NewReader(pageContent) return bsState.evaluateHtml(strUrl, r) default: return nil, errors.New(fmt.Sprintf("Unhandled parameter %q", v)) } }
func FetchWeatherFromUrl(url string) ([]string, error) { contents, err := uhtml.DownloadPage(url) if err != nil { return []string{}, err } return ParseWeather(bytes.NewReader(contents)), nil }
func (bsState *BsState) evaluatePdf(strUrl string) (*BsResult, error) { parsedUrl, err := url.Parse(strUrl) pdfFile := strUrl if parsedUrl.Scheme == "http" || parsedUrl.Scheme == "https" { content, err := html.DownloadPage(parsedUrl.String()) if err != nil { return nil, err } pdfFile = savePdfToText(parsedUrl, content, "/tmp/temp_pdf") } r, err := os.Open(pdfFile) if err != nil { return nil, err } return bsState.evaluatePhrase(r) }