func (pc *PathCache) Start() (notifyChan chan PathCacheNotification, err error) { pc.notifyChan = make(chan PathCacheNotification, 10) pc.stoppedChan = make(chan bool, 1) pc.startup = sync.WaitGroup{} go pc.mux() err = pc.watch() if err != nil { // We expect this to occasionally happen due to timing if err != zk.ErrNoNode { log.Println(log.ERROR, err) } pc.Stop() return } err = pc.watchChildren() if err != nil { // We expect this to occasionally happen due to timing if err != zk.ErrNoNode { log.Println(log.ERROR, err) } pc.Stop() } pc.startup.Wait() return pc.notifyChan, err }
func (s *SkynetDaemon) StopAllSubServices(requestInfo *skynet.RequestInfo, in daemon.StopAllSubServicesRequest, out *daemon.StopAllSubServicesResponse) (err error) { var uuids []string s.serviceLock.Lock() for uuid := range s.Services { uuids = append(uuids, uuid) } s.serviceLock.Unlock() out.Stops = make([]daemon.StopSubServiceResponse, len(uuids)) for i, uuid := range uuids { log.Println(log.TRACE, "Stopping "+uuid) err = s.StopSubService(requestInfo, daemon.StopSubServiceRequest{UUID: uuid}, &out.Stops[i]) if err != nil { log.Println(log.ERROR, "Failed to stop subservice "+uuid, err) return } if out.Stops[i].Ok { out.Count++ } } s.saveState() return }
// TODO: This should be moved out so that it's run asynchronously // it should also use a buffered channel so that if a save is already queued it only saves once func (s *SkynetDaemon) writeStateFile() (err error) { err = s.stateFile.Truncate(0) if err != nil { return } _, err = s.stateFile.Seek(0, 0) if err != nil { return } var b []byte b, err = json.MarshalIndent(s.Services, "", "\t") if err != nil { log.Println(log.ERROR, "Failed to marshall daemon state") return } _, err = s.stateFile.Write(b) if err != nil { log.Println(log.ERROR, "Failed to save daemon state") } return }
func (c *ServiceClient) send(retry, giveup time.Duration, ri *skynet.RequestInfo, fn string, in interface{}, out interface{}) (err error) { if ri == nil { ri = c.NewRequestInfo() } attempts := make(chan sendAttempt) var retryTicker <-chan time.Time retryChan := make(chan bool) if retry > 0 { retryTicker = time.Tick(retry) } var timeoutTimer <-chan time.Time if giveup > 0 { timeoutTimer = time.NewTimer(giveup).C } attemptCount := 1 go c.attemptSend(retry, attempts, ri, fn, in, out) for { select { case <-retryTicker: retryChan <- true case <-retryChan: attemptCount++ ri.RetryCount++ log.Println(log.TRACE, fmt.Sprintf("Sending Attempt# %d with RequestInfo %+v", attemptCount, ri)) go c.attemptSend(retry, attempts, ri, fn, in, out) case <-timeoutTimer: err = RequestTimeout return case attempt := <-attempts: if attempt.err != nil { log.Println(log.ERROR, "Attempt Error: ", attempt.err) // If there is no retry timer we need to exit as retries were disabled if retryTicker == nil { return err } else { // Don't wait for next retry tick retry now retryChan <- true } continue } // copy into the caller's value v := reflect.Indirect(reflect.ValueOf(out)) v.Set(reflect.Indirect(reflect.ValueOf(attempt.result))) return } } }
// this function is the goroutine that owns this service - all thread-sensitive data needs to // be manipulated only through here. func (s *Service) mux() { loop: for { select { case conn := <-s.connectionChan: clientID := config.NewUUID() s.clientMutex.Lock() s.ClientInfo[clientID] = ClientInfo{ Address: conn.RemoteAddr(), } s.clientMutex.Unlock() // send the server handshake sh := skynet.ServiceHandshake{ Registered: s.Registered, ClientID: clientID, Name: s.Name, } encoder := bsonrpc.NewEncoder(conn) err := encoder.Encode(sh) if err != nil { log.Println(log.ERROR, "Failed to encode server handshake", err.Error()) continue } if !s.Registered { log.Println(log.ERROR, "Connection attempted while unregistered. Closing connection") conn.Close() continue } // read the client handshake var ch skynet.ClientHandshake decoder := bsonrpc.NewDecoder(conn) err = decoder.Decode(&ch) if err != nil { log.Println(log.ERROR, "Error calling bsonrpc.NewDecoder: "+err.Error()) continue } // here do stuff with the client handshake go func() { s.RPCServ.ServeCodec(bsonrpc.NewServerCodec(conn)) }() case register := <-s.registeredChan: if register { s.register() } else { s.unregister() } case <-s.shutdownChan: s.shutdown() case _ = <-s.doneChan: break loop } } }
func (sc *scodec) ReadRequestHeader(rq *rpc.Request) (err error) { log.Println(log.TRACE, "RPC Server Entered: ReadRequestHeader") defer log.Println(log.TRACE, "RPC Server Leaving: ReadRequestHeader") err = sc.dec.Decode(rq) if err != nil && err != io.EOF { log.Println(log.ERROR, "RPC Server Error decoding request header: ", err) } return }
func (sc *scodec) ReadRequestBody(v interface{}) (err error) { log.Println(log.TRACE, "RPC Server Entered: ReadRequestBody") defer log.Println(log.TRACE, "RPC Server Leaving: ReadRequestBody") err = sc.dec.Decode(v) if err != nil { log.Println(log.ERROR, "RPC Server Error decoding request body: ", err) } return }
func (sc *scodec) Close() (err error) { log.Println(log.TRACE, "RPC Server Entered: Close") defer log.Println(log.TRACE, "RPC Server Leaving: Close") err = sc.conn.Close() if err != nil { log.Println(log.ERROR, "RPC Server Error closing connection: ", err) return } return }
func (cc *ccodec) ReadResponseHeader(res *rpc.Response) (err error) { log.Println(log.TRACE, "RPC Server Entered: ReadResponseHeader") defer log.Println(log.TRACE, "RPC Server Leaving: ReadResponseHeader") err = cc.dec.Decode(res) if err != nil { log.Println(log.ERROR, "RPC Client Error decoding response header: ", err) } return }
func (sd *SkynetDaemon) Started(s *service.Service) { err := sd.cleanupHost(s.ServiceInfo.UUID) if err != nil { log.Println(log.ERROR, "Error cleaning up host", err) } err = sd.restoreState() if err != nil { log.Println(log.ERROR, "Error restoring state", err) } }
func getGiveupTimeout(service, version string) time.Duration { if d, err := config.String(service, version, "client.timeout.total"); err == nil { if timeout, err := time.ParseDuration(d); err == nil { log.Println(log.TRACE, fmt.Sprintf("Using custom giveup duration %q for %q %q", timeout.String(), service, version)) return timeout } log.Println(log.ERROR, "Failed to parse client.timeout.total", err) } return config.DefaultRetryDuration }
/* Conn.SendTimeout() Acts like Send but takes a timeout */ func (c *Conn) SendTimeout(ri *skynet.RequestInfo, fn string, in interface{}, out interface{}, timeout time.Duration) (err error) { if c.IsClosed() { return ConnectionClosed } sin := skynet.ServiceRPCInWrite{ RequestInfo: ri, Method: fn, ClientID: c.clientID, } var b []byte b, err = bson.Marshal(in) if err != nil { return serviceError{fmt.Sprintf("Error calling bson.Marshal: %v", err)} } sin.In = bson.Binary{ 0x00, b, } sout := skynet.ServiceRPCOutRead{} // Set timeout for this request, then set it back to idle timeout c.setDeadline(timeout) defer c.setDeadline(c.idleTimeout) log.Println(log.TRACE, fmt.Sprintf("Sending Method call %s with ClientID %s to: %s", sin.Method, sin.ClientID, c.addr)) err = c.rpcClient.Call(c.serviceName+".Forward", sin, &sout) if err != nil { c.Close() err = serviceError{err.Error()} return } if sout.ErrString != "" { err = serviceError{sout.ErrString} return } err = bson.Unmarshal(sout.Out, out) if err != nil { log.Println(log.ERROR, "Error unmarshalling nested document") err = serviceError{err.Error()} } return }
func init() { flagset := flag.NewFlagSet("config", flag.ContinueOnError) flagset.StringVar(&configFile, "config", "", "Config File") flagset.StringVar(&uuid, "uuid", "", "uuid") args, _ := SplitFlagsetFromArgs(flagset, os.Args[1:]) flagset.Parse(args) // Ensure we have a UUID if uuid == "" { uuid = NewUUID() } if configFile == "" { for _, f := range defaultConfigFiles { if _, err := os.Stat(f); err == nil { configFile = f break } } } if configFile == "" { log.Println(log.ERROR, "Failed to find config file") conf = config.NewDefault() return } if _, err := os.Stat(configFile); os.IsNotExist(err) { log.Println(log.ERROR, "Config file does not exist", err) conf = config.NewDefault() return } var err error if conf, err = config.ReadDefault(configFile); err != nil { conf = config.NewDefault() log.Fatal(err) } // Set default log level from config, this can be overriden at the service level when the service is created if l, err := conf.RawStringDefault("log.level"); err == nil { log.SetLogLevel(log.LevelFromString(l)) } // Set GOMAXPROCS if i, err := conf.Int("DEFAULT", "runtime.gomaxprocs"); err == nil { runtime.GOMAXPROCS(i) } }
// Wait for existing requests to complete and shutdown service func (s *Service) shutdown() { if s.shuttingDown { return } s.shuttingDown = true s.doneGroup.Add(1) s.rpcListener.Close() s.doneChan <- true s.activeRequests.Wait() err := skynet.GetServiceManager().Remove(*s.ServiceInfo) if err != nil { log.Println(log.ERROR, "Failed to remove service: "+err.Error()) } skynet.GetServiceManager().Shutdown() s.Delegate.Stopped(s) // Call user defined callback s.doneGroup.Done() }
func (sm *ZookeeperServiceManager) Remove(s skynet.ServiceInfo) (err error) { log.Println(log.TRACE, "Removing service", s.UUID) ops := zk.MultiOps{ Delete: []zk.DeleteRequest{ deleteRequest(path.Join("/regions", s.Region, s.UUID), -1), deleteRequest(path.Join("/services", s.Name, s.Version, s.UUID), -1), deleteRequest(path.Join("/services", s.Name, s.UUID), -1), deleteRequest(path.Join("/hosts", s.ServiceAddr.IPAddress, s.UUID), -1), deleteRequest(path.Join("/instances", s.UUID, "registered"), -1), deleteRequest(path.Join("/instances", s.UUID, "name"), -1), deleteRequest(path.Join("/instances", s.UUID, "version"), -1), deleteRequest(path.Join("/instances", s.UUID, "region"), -1), deleteRequest(path.Join("/instances", s.UUID, "addr"), -1), deleteRequest(path.Join("/instances", s.UUID), -1), }, } err = sm.conn.Multi(ops) if err == nil { delete(sm.managedInstances, s.UUID) } // Attempt to remove parent paths for service if they are empty sm.removePathIfEmpty(path.Join("/hosts", s.ServiceAddr.IPAddress)) sm.removePathIfEmpty(path.Join("/regions", s.Region)) sm.removePathIfEmpty(path.Join("/services", s.Name, s.Version)) sm.removePathIfEmpty(path.Join("/services", s.Name)) return }
func (sm *ZookeeperServiceManager) Add(s skynet.ServiceInfo) (err error) { log.Println(log.TRACE, "Adding service to cluster", s.UUID) err = sm.createPathsForService(s) if err != nil { return err } ops := zk.MultiOps{ Create: []zk.CreateRequest{ createRequest(path.Join("/regions", s.Region, s.UUID), []byte{}, zk.PermAll, zk.FlagEphemeral), createRequest(path.Join("/services", s.Name, s.UUID), []byte{}, zk.PermAll, zk.FlagEphemeral), createRequest(path.Join("/services", s.Name, s.Version, s.UUID), []byte{}, zk.PermAll, zk.FlagEphemeral), createRequest(path.Join("/hosts", s.ServiceAddr.IPAddress, s.UUID), []byte{}, zk.PermAll, zk.FlagEphemeral), createRequest(path.Join("/instances", s.UUID), []byte{}, zk.PermAll, 0), createRequest(path.Join("/instances", s.UUID, "registered"), []byte(strconv.FormatBool(s.Registered)), zk.PermAll, zk.FlagEphemeral), createRequest(path.Join("/instances", s.UUID, "name"), []byte(s.Name), zk.PermAll, zk.FlagEphemeral), createRequest(path.Join("/instances", s.UUID, "version"), []byte(s.Version), zk.PermAll, zk.FlagEphemeral), createRequest(path.Join("/instances", s.UUID, "region"), []byte(s.Region), zk.PermAll, zk.FlagEphemeral), createRequest(path.Join("/instances", s.UUID, "addr"), []byte(s.ServiceAddr.String()), zk.PermAll, zk.FlagEphemeral), }, } err = sm.conn.Multi(ops) sm.managedInstances[s.UUID] = s return }
func (s *Service) listen(addr skynet.BindAddr, bindWait *sync.WaitGroup) { var err error s.rpcListener, err = addr.Listen() if err != nil { panic(err) } log.Printf(log.INFO, "%+v\n", ServiceListening{ Addr: &addr, ServiceInfo: s.ServiceInfo, }) // We may have changed port due to conflict, ensure config has the correct port now a, _ := skynet.BindAddrFromString(addr.String()) s.ServiceAddr.IPAddress = a.IPAddress s.ServiceAddr.Port = a.Port bindWait.Done() for { conn, err := s.rpcListener.AcceptTCP() if s.shuttingDown { break } if err != nil && !s.shuttingDown { log.Println(log.ERROR, "AcceptTCP failed", err) continue } s.connectionChan <- conn } }
func (sc *scodec) WriteResponse(rs *rpc.Response, v interface{}) (err error) { log.Println(log.TRACE, "RPC Server Entered: WriteResponse") defer log.Println(log.TRACE, "RPC Server Leaving: WriteResponse") err = sc.enc.Encode(rs) if err != nil { log.Println(log.ERROR, "RPC Server Error encoding rpc response: ", err) return } err = sc.enc.Encode(v) if err != nil { log.Println(log.ERROR, "RPC Server Error encoding response value: ", err) return } return sc.encBuf.Flush() }
func (sm *ZookeeperServiceManager) Unregister(uuid string) (err error) { log.Println(log.TRACE, "Unregister service", uuid) _, err = sm.conn.Set(path.Join("/instances", uuid, "registered"), []byte("false"), -1) return }
func (sm *ZookeeperServiceManager) mux() { for { select { case e := <-sm.session: switch e.Type { case zk.EventNodeDeleted, zk.EventNodeChildrenChanged, zk.EventNodeDataChanged: case zk.EventSession: // TODO: EventNotWatching // TODO: StateDisconnected default: log.Println(log.TRACE, "Zookeeper Event Received: ", e) } case <-sm.done: // Remove instances that were added by this instance for _, s := range sm.managedInstances { sm.Remove(s) } sm.cache.Stop() sm.conn.Close() return } } }
func (pc *PathCache) watchChildren() error { if pc.depth == 0 { return nil } children, _, ev, err := pc.serviceManager.conn.ChildrenW(pc.path) if err != nil { if err != zk.ErrNoNode { log.Println(log.ERROR, err) } return err } go forwardZkEvents(ev, pc.events) pc.startup.Add(len(children)) for _, c := range children { if _, ok := pc.children[path.Join(pc.path, c)]; !ok { go func(c string) { pc.addChildChan <- c }(c) } } return nil }
func (s *Service) serveAdminRequests() { rId := os.Stderr.Fd() + 2 wId := os.Stderr.Fd() + 3 pipeReader := os.NewFile(uintptr(rId), "") pipeWriter := os.NewFile(uintptr(wId), "") s.pipe = daemon.NewPipe(pipeReader, pipeWriter) b := make([]byte, daemon.MAX_PIPE_BYTES) for { n, err := s.pipe.Read(b) if err != nil { if err != io.EOF { log.Printf(log.ERROR, "Error reading from admin pipe "+err.Error()) } else { // We received EOF, ensure we shutdown (if daemon died we could be orphaned) s.Shutdown() } return } cmd := string(b[:n]) log.Println(log.TRACE, "Received "+cmd+" from daemon") switch cmd { case "SHUTDOWN": s.Shutdown() s.pipe.Write([]byte("ACK")) break case "REGISTER": s.Register() s.pipe.Write([]byte("ACK")) case "UNREGISTER": s.Unregister() s.pipe.Write([]byte("ACK")) case "LOG DEBUG", "LOG TRACE", "LOG INFO", "LOG WARN", "LOG ERROR", "LOG FATAL", "LOG PANIC": parts := strings.Split(cmd, " ") log.SetLogLevel(log.LevelFromString(parts[1])) log.Println(log.INFO, "Setting log level to "+parts[1]) s.pipe.Write([]byte("ACK")) } } }
func (cc *ccodec) WriteRequest(req *rpc.Request, v interface{}) (err error) { log.Println(log.TRACE, "RPC Server Entered: WriteRequest") defer log.Println(log.TRACE, "RPC Server Leaving: WriteRequest") err = cc.enc.Encode(req) if err != nil { log.Println(log.ERROR, "RPC Client Error enconding request rpc request: ", err) return } err = cc.enc.Encode(v) if err != nil { log.Println(log.ERROR, "RPC Client Error enconding request value: ", err) return } return cc.encBuf.Flush() }
func (cc *ccodec) ReadResponseBody(v interface{}) (err error) { log.Println(log.TRACE, "RPC Server Entered: ReadResponseBody") defer log.Println(log.TRACE, "RPC Server Leaving: ReadResponseBody") if v == nil { err = errors.New("Response object cannot be nil") if err != nil { log.Println(log.ERROR, "RPC Client Error reading response body: ", err) } return } err = cc.dec.Decode(v) if err != nil { log.Println(log.ERROR, "RPC Client Error decoding response body: ", err) } return }
func (c *InstanceCache) watch() { for { select { case n, ok := <-c.pathNotify: if !ok { return } uuid := uuidFromPath(n.Path) switch n.Type { case PathCacheAddNotification, PathCacheUpdateNotification: s, err := c.getServiceInfo(uuid) // err means not all paths exist yet if err != nil { continue } if _, ok := c.instances[uuid]; ok { log.Println(log.TRACE, "InstanceCache instance updated:", uuid) c.instances[uuid] = s go c.notify(skynet.InstanceUpdated, s) } else { log.Println(log.TRACE, "InstanceCache instance added:", uuid) c.instances[uuid] = s go c.notify(skynet.InstanceAdded, s) } case PathCacheRemoveNotification: if n.Path == path.Join(InstancesBasePath, uuid) { if s, ok := c.instances[uuid]; ok { log.Println(log.TRACE, "InstanceCache instance removed:", uuid) go c.notify(skynet.InstanceRemoved, s) delete(c.instances, uuid) } } } } } }
func getIdleTimeout(s skynet.ServiceInfo) time.Duration { if d, err := config.String(s.Name, s.Version, "client.timeout.idle"); err == nil { if timeout, err := time.ParseDuration(d); err == nil { return timeout } log.Println(log.ERROR, "Failed to parse client.timeout.idle", err) } return config.DefaultIdleTimeout }
func init() { timeout := 1 * time.Second addr := DefaultAddr log.Println(log.INFO, "In init") if a, err := config.RawStringDefault("zookeeper.addr"); err == nil { addr = a log.Println(log.INFO, "addr", a) } else { log.Fatal("Failed to parse Zookeeper addr", err) } if t, err := config.RawStringDefault("zookeeper.timeout"); err == nil { if timeout, err = time.ParseDuration(t); err != nil { log.Fatal("Failed to parse Zookeeper timeout", err) } } skynet.SetServiceManager(NewZookeeperServiceManager(addr, timeout)) }
/* Conn.performHandshake Responsible for performing handshake with service */ func (c *Conn) performHandshake() (err error) { var sh skynet.ServiceHandshake decoder := bsonrpc.NewDecoder(c.conn) err = decoder.Decode(&sh) if err != nil { log.Println(log.ERROR, "Failed to decode ServiceHandshake", err) c.conn.Close() return HandshakeFailed } if sh.Name != c.serviceName { log.Println(log.ERROR, "Attempted to send request to incorrect service: "+sh.Name) return HandshakeFailed } ch := skynet.ClientHandshake{} encoder := bsonrpc.NewEncoder(c.conn) err = encoder.Encode(ch) if err != nil { log.Println(log.ERROR, "Failed to encode ClientHandshake", err) c.conn.Close() return HandshakeFailed } if !sh.Registered { log.Println(log.ERROR, "Attempted to send request to unregistered service") return ServiceUnregistered } c.rpcClient = bsonrpc.NewClient(c.conn) c.clientID = sh.ClientID return }
func (d *Decoder) Decode(pv interface{}) (err error) { var lbuf [4]byte n, err := d.r.Read(lbuf[:]) if n == 0 { return io.EOF } if n != 4 { err = fmt.Errorf("Corrupted BSON stream: could only read %d", n) log.Println(log.ERROR, "Error decoding message (reading length): ", err) return } if err != nil { log.Println(log.ERROR, "Error decoding message (reading length): ", err) return } length := (int(lbuf[0]) << 0) | (int(lbuf[1]) << 8) | (int(lbuf[2]) << 16) | (int(lbuf[3]) << 24) buf := make([]byte, length) copy(buf[0:4], lbuf[:]) n, err = io.ReadFull(d.r, buf[4:]) if err != nil { log.Println(log.ERROR, "Error decoding message (reading message): ", err) return } if n+4 != length { err = fmt.Errorf("Expected %d bytes, read %d", length, n) log.Println(log.ERROR, "Error decoding message (reading message): ", err) } err = bson.Unmarshal(buf, pv) return }
func (c *InstanceCache) buildInitialCache() { for _, p := range c.cache.Children() { uuid := uuidFromPath(p) s, err := c.getServiceInfo(uuid) if err != nil { log.Println(log.WARN, err) continue } c.instances[uuid] = s } }