// heartbeat records that the member is still alive func (r Redis) heartbeat() { tick := time.Tick(beat) // write timeout set in connection pool so each 'beat' ensures we can talk to redis (network partition) (rather than create new connection) conn := pool.Get() defer conn.Close() for _ = range tick { config.Log.Trace("[cluster] - Heartbeat...") _, err := conn.Do("SET", self, "alive", "EX", ttl) if err != nil { conn.Close() config.Log.Error("[cluster] - Failed to heartbeat - %v", err) // clear balancer rules ("stop balancing if we are 'dead'") balance.SetServices(make([]core.Service, 0, 0)) os.Exit(1) } // re-add ourself to member list (just in case) _, err = conn.Do("SADD", "members", self) if err != nil { conn.Close() config.Log.Error("[cluster] - Failed to add myself to list of members - %v", err) // clear balancer rules ("stop balancing if we are 'dead'") balance.SetServices(make([]core.Service, 0, 0)) os.Exit(1) } } }
func SetServices(services []core.Service) error { // in case of failure oldServices, err := database.GetServices() if err != nil { return err } // apply services to balancer err = balance.SetServices(services) if err != nil { return err } if !database.CentralStore { // save to backend err = database.SetServices(services) if err != nil { // undo balance action if uerr := balance.SetServices(oldServices); uerr != nil { err = fmt.Errorf("%v - %v", err.Error(), uerr.Error()) } return err } } return nil }
func TestGetServiceNginx(t *testing.T) { if !nginxPrep() { t.SkipNow() } service, err := balance.GetService(testService2.Id) if err == nil { t.Errorf("Failed to fail GETTING service - %v", err) t.FailNow() } if err := balance.SetServices([]core.Service{testService2}); err != nil { t.Errorf("Failed to SET services - %v", err) t.FailNow() } service, err = balance.GetService(testService2.Id) if err != nil { t.Errorf("Failed to GET service - %v", err) t.FailNow() } if service.Id != testService2.Id { t.Errorf("Read service differs from written service") } }
func TestSetServicesNginx(t *testing.T) { if !nginxPrep() { t.SkipNow() } if err := balance.SetService(&testService1); err != nil { t.Errorf("Failed to SET services - %v", err) t.FailNow() } if err := balance.SetServices([]core.Service{testService2}); err != nil { t.Errorf("Failed to SET services - %v", err) t.FailNow() } _, err := balance.GetService(testService1.Id) if err == nil { t.Errorf("Failed to clear old services on PUT - %v", err) t.FailNow() } service, err := balance.GetService(testService2.Id) if err != nil { t.Error(err) t.FailNow() } if service.Host != testService2.Host { t.Errorf("Read service differs from written service") } }
// cleanup cleans up members not present after ttl seconds func (r Redis) cleanup() { // cycle every second to check for dead members tick := time.Tick(time.Second) conn := pool.Get() defer conn.Close() for _ = range tick { // get list of members that should be alive members, err := redis.Strings(conn.Do("SMEMBERS", "members")) if err != nil { config.Log.Error("[cluster] - Failed to reach redis for cleanup - %v", err) // clear balancer rules ("stop balancing if we are 'dead'") balance.SetServices(make([]core.Service, 0, 0)) os.Exit(1) } for _, member := range members { // if the member timed out, remove the member from the member set exist, _ := redis.Int(conn.Do("EXISTS", member)) if exist == 0 { conn.Do("SREM", "members", member) config.Log.Info("[cluster] - Member '%v' assumed dead. Removed.", member) } } } conn.Close() }
func TestSetServices(t *testing.T) { if skip { t.SkipNow() } services := []core.Service{} services = append(services, testService2) if err := balance.SetServices(services); err != nil { t.Errorf("Failed to SET services - %v", err) t.FailNow() } if _, err := os.Stat("/tmp/scribbleTest/services/tcp-192_168_0_15-80.json"); !os.IsNotExist(err) { t.Errorf("Failed to clear old services on PUT - %v", err) } // todo: read from ipvsadm service, err := balance.GetService(testService2.Id) if err != nil { t.Error(err) } if service.Host != testService2.Host { t.Errorf("Read service differs from written service") } }
func sigHandle() { sigs := make(chan os.Signal, 1) signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) go func() { switch <-sigs { default: // clear balancer rules - (stop balancing if we are offline) balance.SetServices(make([]core.Service, 0, 0)) // clear vips vipmgr.SetVips(make([]core.Vip, 0, 0)) fmt.Println() os.Exit(0) } }() }
// subscribe listens on the portal channel and acts based on messages received func (r Redis) subscribe() { config.Log.Info("[cluster] - Redis subscribing on %s...", config.ClusterConnection) // listen for published messages for { switch v := r.subconn.Receive().(type) { case redis.Message: switch pdata := strings.FieldsFunc(string(v.Data), keepSubstrings); pdata[0] { // SERVICES /////////////////////////////////////////////////////////////////////////////////////////////// case "get-services": if len(pdata) != 2 { config.Log.Error("[cluster] - member not passed in message") break } member := pdata[1] if member == self { svcs, err := common.GetServices() if err != nil { config.Log.Error("[cluster] - Failed to get services - %v", err.Error()) break } services, err := json.Marshal(svcs) if err != nil { config.Log.Error("[cluster] - Failed to marshal services - %v", err.Error()) break } config.Log.Debug("[cluster] - get-services requested, publishing my services") conn := pool.Get() conn.Do("PUBLISH", "services", fmt.Sprintf("%s", services)) conn.Close() } case "set-services": if len(pdata) != 2 { config.Log.Error("[cluster] - services not passed in message") break } services, err := marshalSvcs([]byte(pdata[1])) if err != nil { config.Log.Error("[cluster] - Failed to marshal services - %v", err.Error()) break } err = common.SetServices(*services) if err != nil { config.Log.Error("[cluster] - Failed to set services - %v", err.Error()) break } actionHash := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("set-services %s", *services)))) config.Log.Trace("[cluster] - set-services hash - %v", actionHash) conn := pool.Get() conn.Do("SADD", actionHash, self) conn.Close() config.Log.Debug("[cluster] - set-services successful") case "set-service": if len(pdata) != 2 { // shouldn't happen unless redis is not secure and someone manually `publishJson`es config.Log.Error("[cluster] - service not passed in message") break } svc, err := marshalSvc([]byte(pdata[1])) if err != nil { config.Log.Error("[cluster] - Failed to marshal service - %v", err.Error()) break } err = common.SetService(svc) if err != nil { config.Log.Error("[cluster] - Failed to set service - %v", err.Error()) break } actionHash := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("set-service %s", *svc)))) config.Log.Trace("[cluster] - set-service hash - %v", actionHash) conn := pool.Get() conn.Do("SADD", actionHash, self) conn.Close() config.Log.Debug("[cluster] - set-service successful") case "delete-service": if len(pdata) != 2 { config.Log.Error("[cluster] - service id not passed in message") break } svcId := pdata[1] err := common.DeleteService(svcId) if err != nil { config.Log.Error("[cluster] - Failed to delete service - %v", err.Error()) break } actionHash := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("delete-service %s", svcId)))) config.Log.Trace("[cluster] - delete-service hash - %v", actionHash) conn := pool.Get() conn.Do("SADD", actionHash, self) conn.Close() config.Log.Debug("[cluster] - delete-service successful") // SERVERS /////////////////////////////////////////////////////////////////////////////////////////////// case "set-servers": if len(pdata) != 3 { config.Log.Error("[cluster] - service id not passed in message") break } svcId := pdata[2] servers, err := marshalSrvs([]byte(pdata[1])) if err != nil { config.Log.Error("[cluster] - Failed to marshal server - %v", err.Error()) break } err = common.SetServers(svcId, *servers) if err != nil { config.Log.Error("[cluster] - Failed to set servers - %v", err.Error()) break } actionHash := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("set-servers %s %s", *servers, svcId)))) config.Log.Trace("[cluster] - set-servers hash - %v", actionHash) conn := pool.Get() conn.Do("SADD", actionHash, self) conn.Close() config.Log.Debug("[cluster] - set-servers successful") case "set-server": if len(pdata) != 3 { // shouldn't happen unless redis is not secure and someone manually publishJson config.Log.Error("[cluster] - service id not passed in message") break } svcId := pdata[2] server, err := marshalSrv([]byte(pdata[1])) if err != nil { config.Log.Error("[cluster] - Failed to marshal server - %v", err.Error()) break } err = common.SetServer(svcId, server) if err != nil { config.Log.Error("[cluster] - Failed to set server - %v", err.Error()) break } actionHash := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("set-server %s %s", *server, svcId)))) config.Log.Trace("[cluster] - set-server hash - %v", actionHash) conn := pool.Get() conn.Do("SADD", actionHash, self) conn.Close() config.Log.Debug("[cluster] - set-server successful") case "delete-server": if len(pdata) != 3 { config.Log.Error("[cluster] - service id not passed in message") break } srvId := pdata[1] svcId := pdata[2] err := common.DeleteServer(svcId, srvId) if err != nil { config.Log.Error("[cluster] - Failed to delete server - %v", err.Error()) break } actionHash := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("delete-server %s %s", srvId, svcId)))) config.Log.Trace("[cluster] - delete-server hash - %v", actionHash) conn := pool.Get() conn.Do("SADD", actionHash, self) conn.Close() config.Log.Debug("[cluster] - delete-server successful") // ROUTES /////////////////////////////////////////////////////////////////////////////////////////////// case "get-routes": if len(pdata) != 2 { config.Log.Error("[cluster] - member not passed in message") break } member := pdata[1] if member == self { rts, err := common.GetRoutes() if err != nil { config.Log.Error("[cluster] - Failed to get routes - %v", err.Error()) break } routes, err := json.Marshal(rts) if err != nil { config.Log.Error("[cluster] - Failed to marshal routes - %v", err.Error()) break } config.Log.Debug("[cluster] - get-routes requested, publishing my routes") conn := pool.Get() conn.Do("PUBLISH", "routes", fmt.Sprintf("%s", routes)) conn.Close() } case "set-routes": if len(pdata) != 2 { config.Log.Error("[cluster] - routes not passed in message") break } var routes []core.Route err := parseBody([]byte(pdata[1]), &routes) if err != nil { config.Log.Error("[cluster] - Failed to marshal routes - %v", err.Error()) break } err = common.SetRoutes(routes) if err != nil { config.Log.Error("[cluster] - Failed to set routes - %v", err.Error()) break } actionHash := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("set-routes %s", routes)))) config.Log.Trace("[cluster] - set-routes hash - %v", actionHash) conn := pool.Get() conn.Do("SADD", actionHash, self) conn.Close() config.Log.Debug("[cluster] - set-routes successful") case "set-route": if len(pdata) != 2 { // shouldn't happen unless redis is not secure and someone manually `publishJson`es config.Log.Error("[cluster] - route not passed in message") break } var rte core.Route err := parseBody([]byte(pdata[1]), &rte) if err != nil { config.Log.Error("[cluster] - Failed to marshal route - %v", err.Error()) break } err = common.SetRoute(rte) if err != nil { config.Log.Error("[cluster] - Failed to set route - %v", err.Error()) break } actionHash := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("set-route %s", rte)))) config.Log.Trace("[cluster] - set-route hash - %v", actionHash) conn := pool.Get() conn.Do("SADD", actionHash, self) conn.Close() config.Log.Debug("[cluster] - set-route successful") case "delete-route": if len(pdata) != 2 { config.Log.Error("[cluster] - route not passed in message") break } var rte core.Route err := parseBody([]byte(pdata[1]), &rte) if err != nil { config.Log.Error("[cluster] - Failed to marshal route - %v", err.Error()) break } err = common.DeleteRoute(rte) if err != nil { config.Log.Error("[cluster] - Failed to delete route - %v", err.Error()) break } actionHash := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("delete-route %s", rte)))) config.Log.Trace("[cluster] - delete-route hash - %v", actionHash) conn := pool.Get() conn.Do("SADD", actionHash, self) conn.Close() config.Log.Debug("[cluster] - delete-route successful") // CERTS /////////////////////////////////////////////////////////////////////////////////////////////// case "get-certs": if len(pdata) != 2 { config.Log.Error("[cluster] - member not passed in message") break } member := pdata[1] if member == self { rts, err := common.GetCerts() if err != nil { config.Log.Error("[cluster] - Failed to get certs - %v", err.Error()) break } certs, err := json.Marshal(rts) if err != nil { config.Log.Error("[cluster] - Failed to marshal certs - %v", err.Error()) break } config.Log.Debug("[cluster] - get-certs requested, publishing my certs") conn := pool.Get() conn.Do("PUBLISH", "certs", fmt.Sprintf("%s", certs)) conn.Close() } case "set-certs": if len(pdata) != 2 { config.Log.Error("[cluster] - certs not passed in message") break } var certs []core.CertBundle err := parseBody([]byte(pdata[1]), &certs) if err != nil { config.Log.Error("[cluster] - Failed to marshal certs - %v", err.Error()) break } err = common.SetCerts(certs) if err != nil { config.Log.Error("[cluster] - Failed to set certs - %v", err.Error()) break } actionHash := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("set-certs %s", certs)))) config.Log.Trace("[cluster] - set-certs hash - %v", actionHash) conn := pool.Get() conn.Do("SADD", actionHash, self) conn.Close() config.Log.Debug("[cluster] - set-certs successful") case "set-cert": if len(pdata) != 2 { // shouldn't happen unless redis is not secure and someone manually `publishJson`es config.Log.Error("[cluster] - cert not passed in message") break } var crt core.CertBundle err := parseBody([]byte(pdata[1]), &crt) if err != nil { config.Log.Error("[cluster] - Failed to marshal cert - %v", err.Error()) break } err = common.SetCert(crt) if err != nil { config.Log.Error("[cluster] - Failed to set cert - %v", err.Error()) break } actionHash := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("set-cert %s", crt)))) config.Log.Trace("[cluster] - set-cert hash - %v", actionHash) conn := pool.Get() conn.Do("SADD", actionHash, self) conn.Close() config.Log.Debug("[cluster] - set-cert successful") case "delete-cert": if len(pdata) != 2 { config.Log.Error("[cluster] - cert id not passed in message") break } var crt core.CertBundle err := parseBody([]byte(pdata[1]), &crt) if err != nil { config.Log.Error("[cluster] - Failed to marshal cert - %v", err.Error()) break } err = common.DeleteCert(crt) if err != nil { config.Log.Error("[cluster] - Failed to delete cert - %v", err.Error()) break } actionHash := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("delete-cert %s", crt)))) config.Log.Trace("[cluster] - delete-cert hash - %v", actionHash) conn := pool.Get() conn.Do("SADD", actionHash, self) conn.Close() config.Log.Debug("[cluster] - delete-cert successful") default: config.Log.Error("[cluster] - Recieved unknown data on %v: %v", v.Channel, string(v.Data)) } case error: config.Log.Error("[cluster] - Subscriber failed to receive - %v", v.Error()) if strings.Contains(v.Error(), "closed network connection") { // clear balancer rules ("stop balancing if we are 'dead'") balance.SetServices(make([]core.Service, 0, 0)) // exit so we don't get spammed with logs os.Exit(1) } continue } } }