/* Conn.performHandshake Responsible for performing handshake with service */ func (c *Conn) performHandshake() (err error) { var sh skynet.ServiceHandshake decoder := bsonrpc.NewDecoder(c.conn) err = decoder.Decode(&sh) if err != nil { log.Println(log.ERROR, "Failed to decode ServiceHandshake", err) c.conn.Close() return HandshakeFailed } ch := skynet.ClientHandshake{} encoder := bsonrpc.NewEncoder(c.conn) err = encoder.Encode(ch) if err != nil { log.Println(log.ERROR, "Failed to encode ClientHandshake", err) c.conn.Close() return HandshakeFailed } if !sh.Registered { return ServiceUnregistered } c.rpcClient = bsonrpc.NewClient(c.conn) c.clientID = sh.ClientID return }
func (s *SkynetDaemon) StopAllSubServices(requestInfo *skynet.RequestInfo, in daemon.StopAllSubServicesRequest, out *daemon.StopAllSubServicesResponse) (err error) { var uuids []string s.serviceLock.Lock() for uuid := range s.Services { uuids = append(uuids, uuid) } s.serviceLock.Unlock() out.Stops = make([]daemon.StopSubServiceResponse, len(uuids)) for i, uuid := range uuids { log.Println(log.TRACE, "Stopping "+uuid) err = s.StopSubService(requestInfo, daemon.StopSubServiceRequest{UUID: uuid}, &out.Stops[i]) if err != nil { log.Println(log.ERROR, "Failed to stop subservice "+uuid, err) return } if out.Stops[i].Ok { out.Count++ } } s.saveState() return }
// TODO: This should be moved out so that it's run asynchronously // it should also use a buffered channel so that if a save is already queued it only saves once func (s *SkynetDaemon) writeStateFile() (err error) { err = s.stateFile.Truncate(0) if err != nil { return } _, err = s.stateFile.Seek(0, 0) if err != nil { return } var b []byte b, err = json.MarshalIndent(s.Services, "", "\t") if err != nil { log.Println(log.ERROR, "Failed to marshall daemon state") return } _, err = s.stateFile.Write(b) if err != nil { log.Println(log.ERROR, "Failed to save daemon state") } return }
// this function is the goroutine that owns this service - all thread-sensitive data needs to // be manipulated only through here. func (s *Service) mux() { loop: for { select { case conn := <-s.connectionChan: clientID := config.NewUUID() s.clientMutex.Lock() s.ClientInfo[clientID] = ClientInfo{ Address: conn.RemoteAddr(), } s.clientMutex.Unlock() // send the server handshake sh := skynet.ServiceHandshake{ Registered: s.Registered, ClientID: clientID, } encoder := bsonrpc.NewEncoder(conn) err := encoder.Encode(sh) if err != nil { log.Println(log.ERROR, "Failed to encode server handshake", err.Error()) continue } if !s.Registered { conn.Close() continue } // read the client handshake var ch skynet.ClientHandshake decoder := bsonrpc.NewDecoder(conn) err = decoder.Decode(&ch) if err != nil { log.Println(log.ERROR, "Error calling bsonrpc.NewDecoder: "+err.Error()) continue } // here do stuff with the client handshake go func() { s.RPCServ.ServeCodec(bsonrpc.NewServerCodec(conn)) }() case register := <-s.registeredChan: if register { s.register() } else { s.unregister() } case <-s.shutdownChan: s.shutdown() case _ = <-s.doneChan: break loop } } }
func getGiveupTimeout(service, version string) time.Duration { if d, err := config.String(service, version, "client.timeout.total"); err == nil { if timeout, err := time.ParseDuration(d); err == nil { log.Println(log.TRACE, fmt.Sprintf("Using custom giveup duration %q for %q %q", timeout.String(), service, version)) return timeout } log.Println(log.ERROR, "Failed to parse client.timeout.total", err) } return config.DefaultRetryDuration }
func (sd *SkynetDaemon) Started(s *service.Service) { err := sd.cleanupHost(s.ServiceInfo.UUID) if err != nil { log.Println(log.ERROR, "Error cleaning up host", err) } err = sd.restoreState() if err != nil { log.Println(log.ERROR, "Error restoring state", err) } }
// Wait for existing requests to complete and shutdown service func (s *Service) shutdown() { if s.shuttingDown { return } s.shuttingDown = true s.doneGroup.Add(1) s.rpcListener.Close() s.doneChan <- true s.activeRequests.Wait() err := skynet.GetServiceManager().Remove(*s.ServiceInfo) if err != nil { log.Println(log.ERROR, "Failed to remove service: "+err.Error()) } skynet.GetServiceManager().Shutdown() s.Delegate.Stopped(s) // Call user defined callback s.doneGroup.Done() }
func (s *Service) listen(addr skynet.BindAddr, bindWait *sync.WaitGroup) { var err error s.rpcListener, err = addr.Listen() if err != nil { panic(err) } log.Printf(log.INFO, "%+v\n", ServiceListening{ Addr: &addr, ServiceInfo: s.ServiceInfo, }) // We may have changed port due to conflict, ensure config has the correct port now a, _ := skynet.BindAddrFromString(addr.String()) s.ServiceAddr.IPAddress = a.IPAddress s.ServiceAddr.Port = a.Port bindWait.Done() for { conn, err := s.rpcListener.AcceptTCP() if s.shuttingDown { break } if err != nil && !s.shuttingDown { log.Println(log.ERROR, "AcceptTCP failed", err) continue } s.connectionChan <- conn } }
func (c *ServiceClient) attemptSend(timeout chan bool, attempts chan sendAttempt, ri *skynet.RequestInfo, fn string, in interface{}) { // first find an available instance var r pools.Resource var err error var sp *servicePool for r == nil { if len(c.instances) < 1 { attempts <- sendAttempt{err: errors.New("No instances found")} return } sp = <-c.servicePool log.Println(log.TRACE, "Sending request to: "+sp.service.UUID) // then, get a connection to that instance r, err = sp.pool.Acquire() defer sp.pool.Release(r) if err != nil { if r != nil { r.Close() } failed := FailedConnection{err} log.Printf(log.ERROR, "%T: %+v", failed, failed) c.instanceFailureChan <- *sp.service } } if err != nil { log.Printf(log.ERROR, "Error: %v", err) attempts <- sendAttempt{err: err} return } sr := r.(ServiceResource) result, serviceErr, err := c.sendToInstance(sr, ri, fn, in) if err != nil { // some communication error happened, shut down this connection and remove it from the pool failed := FailedConnection{err} log.Printf(log.ERROR, "%T: %+v", failed, failed) c.instanceFailureChan <- *sp.service sr.Close() return } attempts <- sendAttempt{ result: result, err: serviceErr, } }
func (s *Service) serveAdminRequests() { rId := os.Stderr.Fd() + 2 wId := os.Stderr.Fd() + 3 pipeReader := os.NewFile(uintptr(rId), "") pipeWriter := os.NewFile(uintptr(wId), "") s.pipe = daemon.NewPipe(pipeReader, pipeWriter) b := make([]byte, daemon.MAX_PIPE_BYTES) for { n, err := s.pipe.Read(b) if err != nil { if err != io.EOF { log.Printf(log.ERROR, "Error reading from admin pipe "+err.Error()) } else { // We received EOF, ensure we shutdown (if daemon died we could be orphaned) s.Shutdown() } return } cmd := string(b[:n]) log.Println(log.TRACE, "Received "+cmd+" from daemon") switch cmd { case "SHUTDOWN": s.Shutdown() s.pipe.Write([]byte("ACK")) break case "REGISTER": s.Register() s.pipe.Write([]byte("ACK")) case "UNREGISTER": s.Unregister() s.pipe.Write([]byte("ACK")) case "LOG DEBUG", "LOG TRACE", "LOG INFO", "LOG WARN", "LOG ERROR", "LOG FATAL", "LOG PANIC": parts := strings.Split(cmd, " ") log.SetLogLevel(log.LevelFromString(parts[1])) log.Println(log.INFO, "Setting log level to "+parts[1]) s.pipe.Write([]byte("ACK")) } } }
func init() { flagset := flag.NewFlagSet("config", flag.ContinueOnError) flagset.StringVar(&configFile, "config", "", "Config File") flagset.StringVar(&uuid, "uuid", "", "uuid") args, _ := SplitFlagsetFromArgs(flagset, os.Args[1:]) flagset.Parse(args) // Ensure we have a UUID if uuid == "" { uuid = NewUUID() } if configFile == "" { for _, f := range defaultConfigFiles { if _, err := os.Stat(f); err == nil { configFile = f break } } } if configFile == "" { log.Println(log.ERROR, "Failed to find config file") conf = config.NewDefault() return } if _, err := os.Stat(configFile); os.IsNotExist(err) { log.Println(log.ERROR, "Config file does not exist", err) conf = config.NewDefault() return } var err error if conf, err = config.ReadDefault(configFile); err != nil { conf = config.NewDefault() log.Fatal(err) } // Set default log level from config, this can be overriden at the service level when the service is created if l, err := conf.RawStringDefault("log.level"); err == nil { log.SetLogLevel(log.LevelFromString(l)) } }
func getIdleTimeout(s skynet.ServiceInfo) time.Duration { if d, err := config.String(s.Name, s.Version, "client.timeout.idle"); err == nil { if timeout, err := time.ParseDuration(d); err != nil { return timeout } log.Println(log.ERROR, "Failed to parse client.timeout.idle", err) } return config.DefaultIdleTimeout }
func (c *ServiceClient) send(retry, giveup time.Duration, ri *skynet.RequestInfo, fn string, in interface{}, out interface{}) (err error) { if ri == nil { ri = c.NewRequestInfo() } attempts := make(chan sendAttempt) var retryTicker <-chan time.Time if retry > 0 { retryTicker = time.Tick(retry) } var timeoutTimer <-chan time.Time if giveup > 0 { timeoutTimer = time.NewTimer(giveup).C } attemptCount := 1 go c.attemptSend(retry, attempts, ri, fn, in, out) for { select { case <-retryTicker: attemptCount++ ri.RetryCount++ go c.attemptSend(retry, attempts, ri, fn, in, out) case <-timeoutTimer: err = RequestTimeout return case attempt := <-attempts: if attempt.err != nil { log.Println(log.ERROR, "Attempt Error: ", attempt.err) // If there is no retry timer we need to exit as retries were disabled if retryTicker == nil { return err } continue } // copy into the caller's value v := reflect.Indirect(reflect.ValueOf(out)) v.Set(reflect.Indirect(reflect.ValueOf(attempt.result))) return } } }
// TODO: This is a short term solution to keeping the pools up to date with zookeeper, and load balancing across them // to be replaced by full implementation later, with proper load balancing based off host metrics, and region/host priorities func (c *ServiceClient) managePools() { var failedInstances = make(map[string]int) for { for _, p := range c.instances { select { case <-c.updateChan: var currentInstances = make(map[string]*skynet.ServiceInfo) instances, err := skynet.GetServiceManager().ListInstances(c.criteria) if err == nil && len(instances) > 0 { for _, instance := range instances { if !instance.Registered { continue } key := getInstanceKey(&instance) currentInstances[key] = &instance c.addInstance(instance) } } // Remove old instances for key, _ := range c.instances { if i, ok := currentInstances[key]; ok { c.removeInstance(*i) } } break case i := <-c.instanceFailureChan: key := getInstanceKey(&i) if _, ok := failedInstances[key]; !ok { failedInstances[key] = 1 } failedInstances[key]++ if failedInstances[key] >= MaxFailureCount { log.Println(log.TRACE, "Max failure count reached for instance: ", i.UUID) c.removeInstance(i) delete(failedInstances, key) } case c.servicePool <- p: } } } }
func (ss *SubService) sendAdminCommand(cmd string) bool { log.Println(log.TRACE, "Writing to admin pipe: "+cmd) _, err := ss.pipe.Write([]byte(cmd)) if err != nil { log.Println(log.ERROR, "Failed to write to admin pipe", err) return false } b := make([]byte, daemon.MAX_PIPE_BYTES) log.Println(log.TRACE, "Reading from admin pipe") n, err := ss.pipe.Read(b) if err != nil && err != io.EOF { log.Println(log.ERROR, "Failed to read from admin pipe", err) return false } if bytes.Equal(b[:n], []byte("ACK")) { return true } return false }
func (s *Service) register() { // this version must be run from the mux() goroutine if s.Registered { return } err := skynet.GetServiceManager().Register(s.ServiceInfo.UUID) if err != nil { log.Println(log.ERROR, "Failed to register service: "+err.Error()) } s.Registered = true log.Printf(log.INFO, "%+v\n", ServiceRegistered{s.ServiceInfo}) s.Delegate.Registered(s) // Call user defined callback }
// Starts your skynet service, including binding to ports. Optionally register for requests at the same time. Returns a sync.WaitGroup that will block until all requests have finished func (s *Service) Start() (done *sync.WaitGroup) { bindWait := &sync.WaitGroup{} bindWait.Add(1) go s.listen(s.ServiceAddr, bindWait) // Watch signals for shutdown c := make(chan os.Signal, 1) go watchSignals(c, s) s.doneChan = make(chan bool, 1) // We must block here, we don't want to register, until we've actually bound to an ip:port bindWait.Wait() s.doneGroup = &sync.WaitGroup{} s.doneGroup.Add(1) go func() { s.mux() s.doneGroup.Done() }() done = s.doneGroup if r, err := config.Bool(s.Name, s.Version, "service.register"); err == nil { s.Registered = r } err := skynet.GetServiceManager().Add(*s.ServiceInfo) if err != nil { log.Println(log.ERROR, "Failed to add service: "+err.Error()) } if s.Registered { s.Register() } go s.Delegate.Started(s) // Call user defined callback if s.ServiceInfo.Registered { go s.Delegate.Registered(s) // Call user defined callback } return }
func NewServiceInfo(name, version string) (si *ServiceInfo) { // TODO: we need to grab Host/Region/ServiceAddr from config si = &ServiceInfo{ Name: name, Version: version, UUID: config.UUID(), } var host string var minPort, maxPort int if r, err := config.String(name, version, "region"); err == nil { si.Region = r } else { si.Region = config.DefaultRegion } if h, err := config.String(name, version, "host"); err == nil { host = h } else { host = config.DefaultHost } if p, err := config.Int(name, version, "service.port.min"); err == nil { minPort = p } else { minPort = config.DefaultMinPort } if p, err := config.Int(name, version, "service.port.max"); err == nil { maxPort = p } else { maxPort = config.DefaultMaxPort } log.Println(log.TRACE, host, minPort, maxPort) si.ServiceAddr = BindAddr{IPAddress: host, Port: minPort, MaxPort: maxPort} return si }
// Daemon will run and maintain skynet services. // // Daemon will run the "SkynetDeployment" service, which can be used // to remotely spawn new services on the host. func main() { config, _ := skynet.GetServiceConfig() log.Println(log.INFO, "Connecting to ZooKeeper: ", os.Getenv("SKYNET_ZOOKEEPER")) skynet.SetServiceManager(zkmanager.NewZookeeperServiceManager(os.Getenv("SKYNET_ZOOKEEPER"), 1*time.Second)) config.Name = "SkynetDaemon" config.Version = "2" deployment := NewSkynetDaemon() s := service.CreateService(deployment, config) deployment.Service = s // handle panic so that we remove ourselves from the pool in case of catastrophic failure defer func() { s.Shutdown() deployment.closeStateFile() if err := recover(); err != nil { e := err.(error) log.Fatal("Unrecovered error occured: " + e.Error()) } }() // Collect Host metrics statTicker := time.Tick((5 * time.Second)) go func() { for _ = range statTicker { deployment.updateHostStats(config.ServiceAddr.IPAddress) } }() // If we pass false here service will not be Registered // we could do other work/tasks by implementing the Started method and calling Register() when we're ready s.Start().Wait() }
func (sa *Admin) Stop(in skynet.StopRequest, out *skynet.StopResponse) (err error) { log.Println(log.TRACE, "Got RPC admin command Stop") sa.service.Shutdown() return }
func (sa *Admin) Unregister(in skynet.UnregisterRequest, out *skynet.UnregisterResponse) (err error) { log.Println(log.TRACE, "Got RPC admin command Unregister") sa.service.Unregister() return }
func (ss *SubService) Restart() bool { log.Println(log.INFO, "Restarting service intentially "+ss.UUID) return ss.sendAdminCommand("SHUTDOWN") }