func SNSProxy(w http.ResponseWriter, r *http.Request) { log := logger.New("ns=kernel").At("SNSProxy") defer r.Body.Close() body, err := ioutil.ReadAll(r.Body) if err != nil { log.Error(err) http.Error(w, err.Error(), 500) return } var payload map[string]string err = json.Unmarshal(body, &payload) if err != nil { log.Error(err) http.Error(w, err.Error(), 500) return } url := r.FormValue("endpoint") resp, err := http.Post(url, "application/json", strings.NewReader(payload["Message"])) if err != nil { log.Error(err) http.Error(w, err.Error(), 500) return } log.Log("proxied=true status=%s", resp.Status) w.Write([]byte("ok")) }
func ClusterServices() (ECSServices, error) { var log = logger.New("ns=ClusterServices") services := ECSServices{} lsres, err := ECS().ListServices(&ecs.ListServicesInput{ Cluster: aws.String(os.Getenv("CLUSTER")), }) if err != nil { log.Log("at=ListServices err=%q", err) return services, err } dsres, err := ECS().DescribeServices(&ecs.DescribeServicesInput{ Cluster: aws.String(os.Getenv("CLUSTER")), Services: lsres.ServiceArns, }) if err != nil { log.Log("at=ListServices err=%q", err) return services, err } for i := 0; i < len(dsres.Services); i++ { services = append(services, dsres.Services[i]) } return services, nil }
func api(at string, handler ApiHandlerFunc) http.HandlerFunc { return func(rw http.ResponseWriter, r *http.Request) { log := logger.New("ns=kernel").At(at).Start() if !passwordCheck(r) { rw.Header().Set("WWW-Authenticate", `Basic realm="Convox System"`) rw.WriteHeader(401) rw.Write([]byte("invalid authorization")) return } if !versionCheck(r) { rw.WriteHeader(403) rw.Write([]byte("client outdated, please update with `convox update`")) return } err := handler(rw, r) if err != nil { log.Error(err) rollbar.Error(rollbar.ERR, err) RenderError(rw, err) return } log.Log("state=success") } }
func ws(at string, handler ApiWebsocketFunc) websocket.Handler { return websocket.Handler(func(ws *websocket.Conn) { log := logger.New("ns=kernel").At(at).Start() if !passwordCheck(ws.Request()) { ws.Write([]byte("ERROR: invalid authorization\n")) return } if !versionCheck(ws.Request()) { ws.Write([]byte("client outdated, please update with `convox update`\n")) return } err := handler(ws) if err != nil { ws.Write([]byte(fmt.Sprintf("ERROR: %v\n", err))) log.Error(err) rollbar.Error(rollbar.ERR, err) return } log.Log("state=success") }) }
// Get latest convergence state and send notifications func monitorConverged(lastConverged bool, lastEventAt time.Time) (bool, ecs.ServiceEvent) { log := logger.New("ns=services_monitor") services, err := models.ClusterServices() if err != nil { log.Log("fn=monitorConverged err=%q", err) return lastConverged, ecs.ServiceEvent{ CreatedAt: aws.Time(lastEventAt), } } converged := services.IsConverged() events := services.EventsSince(lastEventAt) log.Log("fn=monitorConverged converged=%t events=%d lastEventAt=%q", converged, len(events), lastEventAt) if events.HasCapacityWarning() { models.NotifyError("rack:capacity", fmt.Errorf(events.CapacityWarning()), map[string]string{ "rack": os.Getenv("RACK"), }) } if converged != lastConverged { models.NotifySuccess("rack:converge", map[string]string{ "rack": os.Getenv("RACK"), "converged": fmt.Sprintf("%t", converged), }) } return converged, services.LastEvent() }
func recovery(rw http.ResponseWriter, r *http.Request, next http.HandlerFunc) { defer recoverWith(func(err error) { log := logger.New("ns=kernel").At("panic") helpers.Error(log, err) http.Error(rw, err.Error(), http.StatusInternalServerError) }) next(rw, r) }
func StartCluster() { var log = logger.New("ns=cluster_monitor") defer recoverWith(func(err error) { helpers.Error(log, err) }) for _ = range time.Tick(5 * time.Minute) { log.Log("tick") instances := Instances{} err := instances.describeASG() if err != nil { log.Error(err) continue } err = instances.describeECS() if err != nil { log.Error(err) continue } // TODO: Add an instances.testDocker() call to the mission critical path // Test if ASG Instance is registered and connected in ECS cluster for _, i := range instances { if !i.ASG { // TODO: Rogue instance?! Terminate? continue } if !i.ECS { // Not registered or not connected => set Unhealthy _, err := models.AutoScaling().SetInstanceHealth( &autoscaling.SetInstanceHealthInput{ HealthStatus: aws.String("Unhealthy"), InstanceId: aws.String(i.Id), ShouldRespectGracePeriod: aws.Bool(true), }, ) i.Unhealthy = true if err != nil { log.Error(err) continue } } } log.Log(instances.log()) } }
func StartImages() { var log = logger.New("ns=app_images") if os.Getenv("DEVELOPMENT") == "true" { return } maxRetries := 5 var err error for i := 0; i < maxRetries; i++ { err := dockerLogin() if err == nil { break } time.Sleep(30 * time.Second) } if err != nil { return } apps, err := models.ListApps() if err != nil { log.Error(err) return } for _, app := range apps { a, err := models.GetApp(app.Name) if err != nil { log.Error(err) continue } for key, value := range a.Parameters { if strings.HasSuffix(key, "Image") { log.Log("cmd=%q", fmt.Sprintf("docker pull %s", value)) data, err := exec.Command("docker", "pull", value).CombinedOutput() if err != nil { fmt.Printf("%+v\n", string(data)) log.Error(err) continue } } } } }
func autoscaleRack() { log := logger.New("ns=workers.autoscale at=autoscaleRack") capacity, err := provider.CapacityGet() if err != nil { log.Log("fn=models.GetSystemCapacity err=%q", err) return } log.Log("autoscale=%t", autoscale) if !autoscale { return } system, err := provider.SystemGet() if err != nil { log.Log("fn=models.GetSystem err=%q", err) return } // calaculate instance requirements based on total process memory needed divided by the memory // on an individual instance instances := int(math.Ceil(float64(capacity.ProcessMemory) / float64(capacity.InstanceMemory))) // instance count cant be less than 2 if instances < 2 { instances = 2 } // instance count must be at least maxconcurrency+1 if instances < (int(capacity.ProcessWidth) + 1) { instances = int(capacity.ProcessWidth) + 1 } log.Log("process.memory=%d instance.memory=%d instances=%d change=%d", capacity.ProcessMemory, capacity.InstanceMemory, instances, (instances - system.Count)) // if no change then exit if system.Count == instances { return } system.Count = instances err = provider.SystemSave(*system) if err != nil { log.Log("fn=system.Save err=%q", err) return } }
func StartHeartbeat() { log := logger.New("ns=heartbeat") defer recoverWith(func(err error) { helpers.Error(log, err) }) helpers.SendMixpanelEvent("kernel-heartbeat", "") for _ = range time.Tick(1 * time.Hour) { helpers.SendMixpanelEvent("kernel-heartbeat", "") } }
func dockerLogin() error { var log = logger.New("ns=app_images") log.Log("cmd=%q", fmt.Sprintf("docker login -e [email protected] -u convox -p ***** %s", os.Getenv("REGISTRY_HOST"))) data, err := exec.Command("docker", "login", "-e", "*****@*****.**", "-u", "convox", "-p", os.Getenv("PASSWORD"), os.Getenv("REGISTRY_HOST")).CombinedOutput() if err != nil { fmt.Printf("%+v\n", string(data)) log.Error(err) } return err }
func StartHeartbeat() { log := logger.New("ns=heartbeat") defer recoverWith(func(err error) { helpers.Error(log, err) }) heartbeat() for _ = range time.Tick(1 * time.Hour) { heartbeat() } }
// EventSend publishes an important message out to the world. // // On AWS messages are published to SNS. The Rack has an HTTP endpoint that is an SNS // subscription, and when a message is delivered forwards them to all configured // webhook services. // // Often the Rack has a Console webhook which facilitates forwarding events // to Slack with additional formatting and filtering. // // Because these are important system events, they are also published to Segment // for operational metrics. func (p *AWSProvider) EventSend(e *structs.Event, err error) error { log := logger.New("ns=kernel") e.Status = "success" e.Timestamp = time.Now().UTC() if err != nil { e.Data["message"] = err.Error() e.Status = "error" } msg, err := json.Marshal(e) if err != nil { helpers.Error(log, err) // report internal errors to Rollbar return err } fmt.Printf("aws EventSend msg=%q\n", msg) // Publish Event to SNS resp, err := p.sns().Publish(&sns.PublishInput{ Message: aws.String(string(msg)), // Required Subject: aws.String(e.Action), TargetArn: aws.String(os.Getenv("NOTIFICATION_TOPIC")), }) if err != nil { helpers.Error(log, err) // report internal errors to Rollbar return err } log.At("EventSend").Log("message-id=%q", *resp.MessageId) // report event to Segment params := map[string]interface{}{ "action": e.Action, "status": e.Status, } for k, v := range e.Data { params[k] = v } helpers.TrackEvent("event", params) return nil }
// Get initial convergence state func checkConverged() (bool, ecs.ServiceEvent) { log := logger.New("ns=services_monitor") services, err := models.ClusterServices() if err != nil { log.Log("fn=checkConverged err=%q", err) return true, ecs.ServiceEvent{ CreatedAt: aws.Time(time.Now()), } } converged := services.IsConverged() lastEvent := services.LastEvent() log.Log("fn=checkConverged converged=%t lastEventAt=%q", converged, lastEvent.CreatedAt) return converged, lastEvent }
func Notify(name, status string, data map[string]string) error { if PauseNotifications { return nil } log := logger.New("ns=kernel") data["rack"] = os.Getenv("RACK") event := &client.NotifyEvent{ Action: name, Status: status, Data: data, Timestamp: time.Now().UTC(), } message, err := json.Marshal(event) if err != nil { return err } fmt.Printf("models EventSend msg=%q\n", message) params := &sns.PublishInput{ Message: aws.String(string(message)), // Required Subject: aws.String(name), TargetArn: aws.String(NotificationTopic), } resp, err := SNS().Publish(params) if err != nil { return err } log.At("Notify").Log("message-id=%q", *resp.MessageId) return nil }
func SNSConfirm(w http.ResponseWriter, r *http.Request) { log := logger.New("ns=kernel").At("SNSConfirm") defer r.Body.Close() body, err := ioutil.ReadAll(r.Body) if err != nil { log.Error(err) http.Error(w, err.Error(), 500) return } var payload map[string]string err = json.Unmarshal(body, &payload) if err != nil { log.Error(err) http.Error(w, err.Error(), 500) return } params := &sns.ConfirmSubscriptionInput{ Token: aws.String(payload["Token"]), TopicArn: aws.String(payload["TopicArn"]), } resp, err := models.SNS().ConfirmSubscription(params) if err != nil { log.Error(err) http.Error(w, err.Error(), 500) return } log.Log("confirmed=true subscriptionArn=%q", *resp.SubscriptionArn) w.Write([]byte("ok")) }
import ( "encoding/json" "fmt" "io/ioutil" "net/http" "os" "time" "github.com/codegangsta/negroni" "github.com/ddollar/logger" "github.com/ddollar/nlogger" "github.com/gorilla/mux" ) var ( log = logger.New("ns=dashboard") org = os.Getenv("ORG") port = "5000" ) type Repository struct { Forks int `json:"forks_count"` Stars int `json:"stargazers_count"` } type Repositories []Repository func init() { RegisterTemplate("index") }
func StartCluster() { var log = logger.New("ns=cluster_monitor") defer recoverWith(func(err error) { helpers.Error(log, err) }) Tick: for _ = range time.Tick(5 * time.Minute) { log.Log("tick") // Ger Rack InstanceCount Parameter instanceCount := 0 // instanceType := "unknown" res, err := models.CloudFormation().DescribeStacks( &cloudformation.DescribeStacksInput{ StackName: aws.String(os.Getenv("RACK")), }, ) if err != nil { log.Error(err) continue } for _, p := range res.Stacks[0].Parameters { if *p.ParameterKey == "InstanceCount" { c, err := strconv.Atoi(*p.ParameterValue) if err != nil { log.Error(err) break Tick } instanceCount = c } // if *p.ParameterKey == "InstanceType" { // instanceType = *p.ParameterValue // } } // helpers.SendMixpanelEvent("kernel-cluster-monitor", fmt.Sprintf("count=%d type=%s", instanceCount, instanceType)) // List and Describe ECS Container Instances ires, err := models.ECS().ListContainerInstances( &ecs.ListContainerInstancesInput{ Cluster: aws.String(os.Getenv("CLUSTER")), }, ) if err != nil { log.Error(err) continue } dres, err := models.ECS().DescribeContainerInstances( &ecs.DescribeContainerInstancesInput{ Cluster: aws.String(os.Getenv("CLUSTER")), ContainerInstances: ires.ContainerInstanceArns, }, ) if err != nil { log.Error(err) continue } cInstanceIds := make([]string, 0) cInstanceConnections := make(map[string]bool) for _, i := range dres.ContainerInstances { cInstanceConnections[*i.Ec2InstanceId] = *i.AgentConnected if *i.AgentConnected { cInstanceIds = append(cInstanceIds, *i.Ec2InstanceId) } } // Get and Describe Rack ASG Resource resources, err := models.ListResources(os.Getenv("RACK")) ares, err := models.AutoScaling().DescribeAutoScalingGroups( &autoscaling.DescribeAutoScalingGroupsInput{ AutoScalingGroupNames: []*string{ aws.String(resources["Instances"].Id), }, }, ) if err != nil { log.Error(err) continue } // Test if ASG Instance is registered and connected in ECS cluster aInstanceIds := []string{} uInstanceIds := []string{} for _, i := range ares.AutoScalingGroups[0].Instances { if connected, exists := cInstanceConnections[*i.InstanceId]; connected && exists { aInstanceIds = append(aInstanceIds, *i.InstanceId) } else { // Not registered or not connected => set Unhealthy if *i.LifecycleState == "InService" { _, err := models.AutoScaling().SetInstanceHealth( &autoscaling.SetInstanceHealthInput{ HealthStatus: aws.String("Unhealthy"), InstanceId: aws.String(*i.InstanceId), ShouldRespectGracePeriod: aws.Bool(true), }, ) if err != nil { log.Error(err) continue } uInstanceIds = append(uInstanceIds, *i.InstanceId) } } } sort.Strings(aInstanceIds) sort.Strings(cInstanceIds) sort.Strings(uInstanceIds) // if len(uInstanceIds) > 0 { // helpers.SendMixpanelEvent("kernel-cluster-monitor-mark", strings.Join(uInstanceIds, ",")) // } log.Log("InstanceCount=%v connected='%v' healthy='%v' marked='%s'", instanceCount, strings.Join(cInstanceIds, ","), strings.Join(aInstanceIds, ","), strings.Join(uInstanceIds, ",")) } }