Example #1
0
func main() {
	var help = flag.Bool("help", false, "print help")
	var port = flag.Int("port", -1, "port to listen")
	var gkArrd = flag.String("gatekeeper", "", "gatekeeper address")
	var cgAddr = flag.String("caregiver", "", "caregiver address")
	var vint = flag.Int("interval", 1, "sleep interval")
	var pushCnt = flag.Int("push-cnt", 10, "urls to push to the caregiver at a time")
	var gracefulRestart = graceful.SetFlag()
	flag.Parse()

	if *help || *port == -1 || *gkArrd == "" || *cgAddr == "" {
		flag.PrintDefaults()
		return
	}

	sp, err := spider.NewSpider(*gkArrd, *cgAddr, time.Duration(*vint)*time.Second, uint(*pushCnt))
	if err != nil {
		log.Fatal(err)
	}

	srv := rpc.NewServer()
	srv.Register(&spider.SpiderServer{sp})

	server := gjsonrpc.NewServer(srv)
	graceful.SetSighup(server)

	go func() {
		for {
			if err := sp.RunPusher(); err != nil {
				log.Errorln(err)
			}
		}
	}()

	go func() {
		for {
			if err := sp.RunPuller(); err != nil {
				log.Errorln(err)
			}
		}
	}()

	if err := server.ListenAndServe(":"+strconv.Itoa(*port), *gracefulRestart); err != nil {
		log.Fatal(errors.NewErr(err))
	}

	if err := graceful.Restart(server); err != nil {
		log.Fatal(err)
	}
}
Example #2
0
func (self *CaregiverServer) PushUrls(args *Args, result *struct{}) error {
	err := self.Caregiver.PushUrls(args.Urls)
	if err != nil {
		log.Errorln(err, args)
	}
	return err
}
Example #3
0
func (self *GatekeeperServer) Write(args *WriteArgs, result *FindResult) error {
	key, err := UrlTransform(args.Url)
	if err != nil {
		log.Errorln(err, args)
		return err
	}

	r, err := self.Gatekeeper.Write(args.Url, key, []byte(args.Body))
	if err != nil {
		log.Errorln(err, args)
		return err
	}

	*result = FindResult{
		Val: &r,
	}
	return nil
}
Example #4
0
func (self *ResolverServer) Resolve(args *Args, result *[]string) error {
	r, err := self.Resolver.Resolve(args.Host)
	if err != nil {
		log.Errorln(err, args)
		return err
	}

	*result = r
	return nil
}
Example #5
0
func (self *DownloaderServer) DownloadAll(args *ArgsAll, result *[]string) error {
	r, err := self.Downloader.DownloadAll(args.Urls)
	if err != nil {
		log.Errorln(err, args)
		return err
	}

	*result = r
	return nil
}
Example #6
0
func main() {
	var help = flag.Bool("help", false, "print help")
	var port = flag.Int("port", -1, "port to listen")
	var dlerArrd = flag.String("dl", "", "downloader address")
	var dnsAddr = flag.String("dns", "", "dns resolver address")
	var defaultMaxCount = flag.Int("maxcount", 1, "default maximum count urls for specific host to download at once")
	var defaultTimeout = flag.Int("timeout", 0, "default timeout between downloads for specific host (im ms)")
	var pullTimeout = flag.Int("pull-timeout", 100, "pull check timeout (im ms)")
	var workTimeout = flag.Int("work-timeout", 1000, "pull check timeout (im ms)")
	var gracefulRestart = graceful.SetFlag()
	flag.Parse()

	if *help || *port == -1 || *dlerArrd == "" {
		flag.PrintDefaults()
		return
	}

	ct, err := caregiver.NewCaregiver(
		*dlerArrd,
		*dnsAddr,
		uint(*defaultMaxCount),
		time.Duration(*defaultTimeout)*time.Millisecond,
		time.Duration(*pullTimeout)*time.Millisecond,
		time.Duration(*workTimeout)*time.Millisecond,
	)
	if err != nil {
		log.Fatal(err)
	}

	srv := rpc.NewServer()
	srv.Register(&caregiver.CaregiverServer{ct})

	server := gjsonrpc.NewServer(srv)
	graceful.SetSighup(server)

	go func() {
		for {
			if err := ct.Start(); err != nil {
				log.Errorln(err)
				time.Sleep(ct.WorkTimeout)
				continue
			}
			break
		}
	}()

	if err := server.ListenAndServe(":"+strconv.Itoa(*port), *gracefulRestart); err != nil {
		log.Fatal(errors.NewErr(err))
	}

	if err := graceful.Restart(server); err != nil {
		log.Fatal(err)
	}
}
Example #7
0
func (self *GatekeeperServer) Read(args *FindArgs, result *ReadResult) error {
	key, err := UrlTransform(args.Url)
	if err != nil {
		log.Errorln(err, args)
		return err
	}

	r, ok := self.Gatekeeper.Find(key)
	if !ok {
		return nil
	}

	data, err := self.Gatekeeper.Read(r)
	if err != nil {
		log.Errorln(err, args)
		return err
	}

	*result = ReadResult{FindResult{Val: &r}, &data}
	return nil
}
Example #8
0
func CreateErrorHandler(handler ErrorHandlerT) http.HandlerFunc {
	return func(w http.ResponseWriter, r *http.Request) {
		if err := handler(w, r); err != nil {
			log.Errorln(err)
			if _, ok := err.(ClientErrorT); ok {
				http.Error(w, err.Error(), http.StatusBadRequest)
			} else {
				http.Error(w, err.Error(), http.StatusInternalServerError)
			}
		}
	}
}
Example #9
0
func (self *GatekeeperServer) Find(args *FindArgs, result *FindResult) error {
	key, err := UrlTransform(args.Url)
	if err != nil {
		log.Errorln(err, args)
		return err
	}

	if r, ok := self.Gatekeeper.Find(key); ok {
		*result = FindResult{
			Val: &r,
		}
	}
	return nil
}
Example #10
0
func (self *Resolver) Resolve(host string) ([]string, error) {
	log.Printf("Resolver.Resolve(%s)\n", host)
	self.mutex.RLock()
	r, hacheHit := self.cache[host]
	self.mutex.RUnlock()

	var res []string
	if hacheHit && time.Now().Before(r.end) {
		self.load(&r, &res)
		log.Printf("Resolver.Resolve(%s) OK (cache)!\n", host)
		return res, nil
	}

	val, err := net.LookupIP(host)
	if err != nil {
		err = errors.NewErr(err)
		if hacheHit {
			self.load(&r, &res)
			log.Errorln(err)
			log.Printf("Resolver.Resolve(%s) OK (cache, but error)!\n", host)
			return res, nil
		}
		return nil, err
	}

	d := dataT{
		ips: make([]net.IP, 0, len(val)),
		end: time.Now().Add(self.cacheTime),
	}
	for _, v := range val {
		// if ip4 := v.To4(); len(ip4) == net.IPv4len {
		d.ips = append(d.ips, v)
		// }
	}

	self.mutex.Lock()
	self.cache[host] = d
	self.mutex.Unlock()

	self.load(&d, &res)
	log.Printf("Resolver.Resolve(%s) OK!\n", host)
	return res, nil
}
Example #11
0
func main() {
	var help = flag.Bool("help", false, "print help")
	var port = flag.Int("port", -1, "port to listen")
	var rout = flag.String("rout", "random", "routing policy: random, roundrobin")
	var urls Urls
	flag.Var(&urls, "url", "backend url")
	var gracefulRestart = graceful.SetFlag()
	flag.Parse()

	if *help || *port == -1 || urls == nil {
		flag.PrintDefaults()
		return
	}

	router, err := balanser.NewChooser(*rout, urls)
	if err != nil {
		flag.PrintDefaults()
		return
	}

	balanser := btcp.NewBalanser(router, urls)
	server := grpc.NewServer(rpc.NewServer(), func(srv *rpc.Server, conn io.ReadWriteCloser) {
		if err := balanser.Request(conn.(*net.TCPConn)); err != nil {
			log.Errorln(err)
		}
	})

	graceful.SetSighup(server)

	if err := server.ListenAndServe(":"+strconv.Itoa(*port), *gracefulRestart); err != nil {
		log.Fatal(errors.NewErr(err))
	}

	if err := graceful.Restart(server); err != nil {
		log.Fatal(err)
	}
}
Example #12
0
func (self *Caregiver) Start() error {
	log.Printf("Caregiver.Start()\n")
	for {
		// выделим хосты, которын можно по таймауту качать
		data := map[string]*hostData{}
		self.mutex.Lock()
		now := time.Now()
		for k, v := range self.hosts {
			if v.urls.Len() != 0 && v.end.Before(now) {
				data[k] = v
			}
		}
		self.mutex.Unlock()
		if len(data) == 0 {
			time.Sleep(self.WorkTimeout)
			continue
		}

		log.Printf("Caregiver.Start(): start downloading\n")

		// резолвим dns
		hosts := make([]string, 0, len(data))
		for k, _ := range data {
			hosts = append(hosts, k)
		}

		var err error
		ips := map[string][]string{}
		// if self.dns != nil {
		// 	ips, err = self.dns.ResolveAll(hosts)
		// 	if err != nil {
		// 		return err
		// 	}
		// }

		now = time.Now()
		// можно качать!
		urls := []string{}
		for k, v := range data {
			/*
				TODO: запросы по ip почему-то не работаютю.
				Подозреваю, что сервер читает RequestURL и нужно будет реализовать свой стек http, чтобы все заработало.
			*/

			ip := k
			if v, ok := ips[k]; ok && len(v) != 0 {
				ip = v[0]
				if strings.Contains(ip, ":") {
					ip = "[" + ip + "]:80"
				}
			}
			host := "http://" + ip
			us := v.urls.DequeueN(v.maxCount)
			for i := 0; i < len(us); i += 1 {
				if len(us[i]) != 0 {
					us[i] = host + "/" + us[i]
				} else {
					us[i] = host
				}
			}
			urls = append(urls, us...)
		}

		log.Printf("Caregiver.Start(): collected urls %#v\n", urls)
		docs, err := self.downloader.DownloadAll(urls)
		if err != nil {
			log.Errorln(err)
		}

		now = time.Now()
		for _, v := range data {
			v.end = now.Add(v.timeout)
		}

		for i, v := range docs {
			if v != "" {
				self.dataMutex.Lock()
				self.data[urls[i]] = v
				self.dataMutex.Unlock()
			} else {
				log.Errorln("Couldn't download url "+urls[i]+",", v)
			}
		}

		log.Printf("Caregiver.Start(): downloaded urls %#v\n", urls)
	}
}