func main() { var help = flag.Bool("help", false, "print help") var port = flag.Int("port", -1, "port to listen") var gkArrd = flag.String("gatekeeper", "", "gatekeeper address") var cgAddr = flag.String("caregiver", "", "caregiver address") var vint = flag.Int("interval", 1, "sleep interval") var pushCnt = flag.Int("push-cnt", 10, "urls to push to the caregiver at a time") var gracefulRestart = graceful.SetFlag() flag.Parse() if *help || *port == -1 || *gkArrd == "" || *cgAddr == "" { flag.PrintDefaults() return } sp, err := spider.NewSpider(*gkArrd, *cgAddr, time.Duration(*vint)*time.Second, uint(*pushCnt)) if err != nil { log.Fatal(err) } srv := rpc.NewServer() srv.Register(&spider.SpiderServer{sp}) server := gjsonrpc.NewServer(srv) graceful.SetSighup(server) go func() { for { if err := sp.RunPusher(); err != nil { log.Errorln(err) } } }() go func() { for { if err := sp.RunPuller(); err != nil { log.Errorln(err) } } }() if err := server.ListenAndServe(":"+strconv.Itoa(*port), *gracefulRestart); err != nil { log.Fatal(errors.NewErr(err)) } if err := graceful.Restart(server); err != nil { log.Fatal(err) } }
func (self *CaregiverServer) PushUrls(args *Args, result *struct{}) error { err := self.Caregiver.PushUrls(args.Urls) if err != nil { log.Errorln(err, args) } return err }
func (self *GatekeeperServer) Write(args *WriteArgs, result *FindResult) error { key, err := UrlTransform(args.Url) if err != nil { log.Errorln(err, args) return err } r, err := self.Gatekeeper.Write(args.Url, key, []byte(args.Body)) if err != nil { log.Errorln(err, args) return err } *result = FindResult{ Val: &r, } return nil }
func (self *ResolverServer) Resolve(args *Args, result *[]string) error { r, err := self.Resolver.Resolve(args.Host) if err != nil { log.Errorln(err, args) return err } *result = r return nil }
func (self *DownloaderServer) DownloadAll(args *ArgsAll, result *[]string) error { r, err := self.Downloader.DownloadAll(args.Urls) if err != nil { log.Errorln(err, args) return err } *result = r return nil }
func main() { var help = flag.Bool("help", false, "print help") var port = flag.Int("port", -1, "port to listen") var dlerArrd = flag.String("dl", "", "downloader address") var dnsAddr = flag.String("dns", "", "dns resolver address") var defaultMaxCount = flag.Int("maxcount", 1, "default maximum count urls for specific host to download at once") var defaultTimeout = flag.Int("timeout", 0, "default timeout between downloads for specific host (im ms)") var pullTimeout = flag.Int("pull-timeout", 100, "pull check timeout (im ms)") var workTimeout = flag.Int("work-timeout", 1000, "pull check timeout (im ms)") var gracefulRestart = graceful.SetFlag() flag.Parse() if *help || *port == -1 || *dlerArrd == "" { flag.PrintDefaults() return } ct, err := caregiver.NewCaregiver( *dlerArrd, *dnsAddr, uint(*defaultMaxCount), time.Duration(*defaultTimeout)*time.Millisecond, time.Duration(*pullTimeout)*time.Millisecond, time.Duration(*workTimeout)*time.Millisecond, ) if err != nil { log.Fatal(err) } srv := rpc.NewServer() srv.Register(&caregiver.CaregiverServer{ct}) server := gjsonrpc.NewServer(srv) graceful.SetSighup(server) go func() { for { if err := ct.Start(); err != nil { log.Errorln(err) time.Sleep(ct.WorkTimeout) continue } break } }() if err := server.ListenAndServe(":"+strconv.Itoa(*port), *gracefulRestart); err != nil { log.Fatal(errors.NewErr(err)) } if err := graceful.Restart(server); err != nil { log.Fatal(err) } }
func (self *GatekeeperServer) Read(args *FindArgs, result *ReadResult) error { key, err := UrlTransform(args.Url) if err != nil { log.Errorln(err, args) return err } r, ok := self.Gatekeeper.Find(key) if !ok { return nil } data, err := self.Gatekeeper.Read(r) if err != nil { log.Errorln(err, args) return err } *result = ReadResult{FindResult{Val: &r}, &data} return nil }
func CreateErrorHandler(handler ErrorHandlerT) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { if err := handler(w, r); err != nil { log.Errorln(err) if _, ok := err.(ClientErrorT); ok { http.Error(w, err.Error(), http.StatusBadRequest) } else { http.Error(w, err.Error(), http.StatusInternalServerError) } } } }
func (self *GatekeeperServer) Find(args *FindArgs, result *FindResult) error { key, err := UrlTransform(args.Url) if err != nil { log.Errorln(err, args) return err } if r, ok := self.Gatekeeper.Find(key); ok { *result = FindResult{ Val: &r, } } return nil }
func (self *Resolver) Resolve(host string) ([]string, error) { log.Printf("Resolver.Resolve(%s)\n", host) self.mutex.RLock() r, hacheHit := self.cache[host] self.mutex.RUnlock() var res []string if hacheHit && time.Now().Before(r.end) { self.load(&r, &res) log.Printf("Resolver.Resolve(%s) OK (cache)!\n", host) return res, nil } val, err := net.LookupIP(host) if err != nil { err = errors.NewErr(err) if hacheHit { self.load(&r, &res) log.Errorln(err) log.Printf("Resolver.Resolve(%s) OK (cache, but error)!\n", host) return res, nil } return nil, err } d := dataT{ ips: make([]net.IP, 0, len(val)), end: time.Now().Add(self.cacheTime), } for _, v := range val { // if ip4 := v.To4(); len(ip4) == net.IPv4len { d.ips = append(d.ips, v) // } } self.mutex.Lock() self.cache[host] = d self.mutex.Unlock() self.load(&d, &res) log.Printf("Resolver.Resolve(%s) OK!\n", host) return res, nil }
func main() { var help = flag.Bool("help", false, "print help") var port = flag.Int("port", -1, "port to listen") var rout = flag.String("rout", "random", "routing policy: random, roundrobin") var urls Urls flag.Var(&urls, "url", "backend url") var gracefulRestart = graceful.SetFlag() flag.Parse() if *help || *port == -1 || urls == nil { flag.PrintDefaults() return } router, err := balanser.NewChooser(*rout, urls) if err != nil { flag.PrintDefaults() return } balanser := btcp.NewBalanser(router, urls) server := grpc.NewServer(rpc.NewServer(), func(srv *rpc.Server, conn io.ReadWriteCloser) { if err := balanser.Request(conn.(*net.TCPConn)); err != nil { log.Errorln(err) } }) graceful.SetSighup(server) if err := server.ListenAndServe(":"+strconv.Itoa(*port), *gracefulRestart); err != nil { log.Fatal(errors.NewErr(err)) } if err := graceful.Restart(server); err != nil { log.Fatal(err) } }
func (self *Caregiver) Start() error { log.Printf("Caregiver.Start()\n") for { // выделим хосты, которын можно по таймауту качать data := map[string]*hostData{} self.mutex.Lock() now := time.Now() for k, v := range self.hosts { if v.urls.Len() != 0 && v.end.Before(now) { data[k] = v } } self.mutex.Unlock() if len(data) == 0 { time.Sleep(self.WorkTimeout) continue } log.Printf("Caregiver.Start(): start downloading\n") // резолвим dns hosts := make([]string, 0, len(data)) for k, _ := range data { hosts = append(hosts, k) } var err error ips := map[string][]string{} // if self.dns != nil { // ips, err = self.dns.ResolveAll(hosts) // if err != nil { // return err // } // } now = time.Now() // можно качать! urls := []string{} for k, v := range data { /* TODO: запросы по ip почему-то не работаютю. Подозреваю, что сервер читает RequestURL и нужно будет реализовать свой стек http, чтобы все заработало. */ ip := k if v, ok := ips[k]; ok && len(v) != 0 { ip = v[0] if strings.Contains(ip, ":") { ip = "[" + ip + "]:80" } } host := "http://" + ip us := v.urls.DequeueN(v.maxCount) for i := 0; i < len(us); i += 1 { if len(us[i]) != 0 { us[i] = host + "/" + us[i] } else { us[i] = host } } urls = append(urls, us...) } log.Printf("Caregiver.Start(): collected urls %#v\n", urls) docs, err := self.downloader.DownloadAll(urls) if err != nil { log.Errorln(err) } now = time.Now() for _, v := range data { v.end = now.Add(v.timeout) } for i, v := range docs { if v != "" { self.dataMutex.Lock() self.data[urls[i]] = v self.dataMutex.Unlock() } else { log.Errorln("Couldn't download url "+urls[i]+",", v) } } log.Printf("Caregiver.Start(): downloaded urls %#v\n", urls) } }