Пример #1
0
func (bsState *BsState) processTrainQuery(query BsQuery) ([]string, error) {
	storage := bsState.getStorage(query.Bs)
	processed := []string{}
	for _, strUrl := range query.Urls {
		pageContent, err := html.DownloadPage(strUrl)
		if err != nil {
			return []string{}, err
		}
		parsedUrl, err := url.Parse(strUrl)
		if err != nil {
			return []string{}, err
		}
		processed = append(processed, strUrl)
		if isPdf(strUrl) {
			textPdf := savePdfToText(parsedUrl, pageContent, storage)
			bsState.trainWithTextFile(textPdf, query.Bs)
		} else {
			saveUrl(parsedUrl, pageContent, storage)
			bsState.trainWithPageContent(pageContent, query.Bs)
		}
	}
	if query.Phrase != "" {
		dest := bsState.getPhraseStorage(query.Bs)
		appendPhrase(query.Phrase, dest)
		bsState.trainWithPhrase(query.Phrase, query.Bs)
	}
	return processed, nil
}
Пример #2
0
func (bsState *BsState) evaluateHtml(strUrl string, v interface{}) (*BsResult, error) {
	switch tv := v.(type) {
	case io.Reader:
		words, title := html.TokenizePage(tv)
		score := bsState.EvaluateBs(words)
		return &BsResult{title, score}, nil
	case string:
		parsedUrl, err := url.Parse(tv)
		if err != nil {
			return nil, err
		}
		var pageContent []byte
		if parsedUrl.Scheme == "http" || parsedUrl.Scheme == "https" {
			pageContent, err = html.DownloadPage(tv)
		} else {
			pageContent, err = ioutil.ReadFile(tv)
		}
		if err != nil {
			return nil, err
		}
		r := bytes.NewReader(pageContent)
		return bsState.evaluateHtml(strUrl, r)
	default:
		return nil, errors.New(fmt.Sprintf("Unhandled parameter %q", v))
	}
}
Пример #3
0
func FetchWeatherFromUrl(url string) ([]string, error) {
	contents, err := uhtml.DownloadPage(url)
	if err != nil {
		return []string{}, err
	}
	return ParseWeather(bytes.NewReader(contents)), nil
}
Пример #4
0
func (bsState *BsState) evaluatePdf(strUrl string) (*BsResult, error) {
	parsedUrl, err := url.Parse(strUrl)
	pdfFile := strUrl
	if parsedUrl.Scheme == "http" || parsedUrl.Scheme == "https" {
		content, err := html.DownloadPage(parsedUrl.String())
		if err != nil {
			return nil, err
		}
		pdfFile = savePdfToText(parsedUrl, content, "/tmp/temp_pdf")
	}
	r, err := os.Open(pdfFile)
	if err != nil {
		return nil, err
	}
	return bsState.evaluatePhrase(r)
}