func (m *hostsManager) initByHostName(db proxy.DbHost, hostName string) error {
	baseURL, err := m.resolveURL(hostName)
	if err != nil {
		return err
	}

	statusCode, body, err := m.readRobotTxt(hostName)
	if err != nil {
		return err
	}

	robot, err := robotstxt.FromStatusAndBytes(statusCode, body)
	if err != nil {
		return werrors.NewDetails(ErrCreateRobotsTxtFromURL, err)
	}

	host := proxy.NewHost(hostName, statusCode, body)
	hostID, err := db.AddHost(host, baseURL)

	if err == nil {
		m.hosts[hostName] = hostID
		m.robotsTxt[hostID] = robot.FindGroup("Googlebot")
	}

	return err
}
func (f *fakeDbHost) GetHosts() (map[int64]*proxy.Host, error) {
	result := make(map[int64]*proxy.Host)
	if f.getHostErr != "" {
		return result, errors.New(f.getHostErr)
	}
	if f.robotTxtData != "" {
		result[1] = proxy.NewHost("hostName", 200, []byte(f.robotTxtData))
	}

	return result, nil
}
Exemple #3
0
// GetHosts - get rows from table 'Host'
func (db *DBrw) GetHosts() (map[int64]*proxy.Host, error) {
	result := make(map[int64]*proxy.Host)

	var hosts []database.Host
	err := db.Find(&hosts).Error
	if err != nil {
		return result, fmt.Errorf("Get hosts list from db, message: %s", err)
	}

	for _, host := range hosts {
		result[host.ID] = proxy.NewHost(host.Name, host.RobotsStatusCode, host.RobotsData)
	}

	return result, nil
}
// TestInitByHostName ...
func TestInitByHostName(t *testing.T) {
	Convey("Success", t, func() {
		robotstxtBody := []byte("User-agent: *")
		ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
			_, _ = w.Write(robotstxtBody)
		}))
		defer ts.Close()

		var hostID int64
		hostID = 1
		baseURL := ts.URL
		parsedURL, err := url.Parse(baseURL)
		So(err, ShouldBeNil)

		h := &hostsManager{
			robotsTxt: make(map[int64]*robotstxt.Group),
			hosts:     make(map[string]int64)}
		db := &fakeDbHost{}
		hostName := parsedURL.Host
		hostsExpected := make(map[string]int64)
		hostsExpected[hostName] = hostID
		robot, err := robotstxt.FromStatusAndBytes(200, robotstxtBody)
		So(err, ShouldBeNil)
		robotsTxtExpected := make(map[int64]*robotstxt.Group)
		robotsTxtExpected[hostID] = robot.FindGroup("Googlebot")
		host := proxy.NewHost(hostName, 200, robotstxtBody)

		err = h.initByHostName(db, hostName)
		So(err, ShouldBeNil)
		So(h.hosts, ShouldResemble, hostsExpected)
		So(h.robotsTxt, ShouldResemble, robotsTxtExpected)
		So(db.host, ShouldResemble, host)
		So(db.baseURL, ShouldEqual, baseURL)
	})

	Convey("Failed resolve base URL by status code", t, func() {
		ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
			http.Error(w, "error status", 500)
		}))
		defer ts.Close()

		parsedURL, err := url.Parse(ts.URL)
		So(err, ShouldBeNil)

		h := &hostsManager{}
		db := &fakeDbHost{}

		err = h.initByHostName(db, parsedURL.Host)
		So(err, ShouldNotBeNil)
		So(err.Error(), ShouldEqual, ErrResolveBaseURL)
	})

	Convey("Failed read robot txt", t, func() {
		ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
			if r.URL.Path != "/" {
				http.Redirect(w, r, "/error", http.StatusFound)
			}
		}))
		defer ts.Close()

		parsedURL, err := url.Parse(ts.URL)
		So(err, ShouldBeNil)

		h := &hostsManager{}
		db := &fakeDbHost{}

		err = h.initByHostName(db, parsedURL.Host)
		So(err, ShouldNotBeNil)
		So(err.Error(), ShouldEqual, ErrGetRequest)
	})

	Convey("Failed create robots txt", t, func() {
		robotstxtBody := []byte("Disallow:without_user_agent")
		ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
			_, _ = w.Write(robotstxtBody)
		}))
		defer ts.Close()

		parsedURL, err := url.Parse(ts.URL)
		So(err, ShouldBeNil)

		h := &hostsManager{}
		db := &fakeDbHost{}

		err = h.initByHostName(db, parsedURL.Host)
		So(err, ShouldNotBeNil)
		So(err.Error(), ShouldEqual, ErrCreateRobotsTxtFromURL)
	})
}