func (b beanstalkdFactory) Handler(f func(*Message), names ...string) (Handler, error) { name := "default" if len(names) > 0 { name = names[0] } return &executor{ inner: func() { if message, err := get(5e9, names...); err == nil { log.Debugf("Dispatching %q message to handler function.", message.Action) go func(m *Message) { f(m) if m.fail { q := beanstalkdQ{name: name} q.Put(m, 0) } }(message) } else { log.Debugf("Failed to get message from the queue: %s. Trying again...", err) if e, ok := err.(*net.OpError); ok && e.Op == "dial" { time.Sleep(5e9) } } }, }, nil }
// Execute executes the pipeline. // // The execution starts in the forward phase, calling the Forward function of // all actions. If none of the Forward calls return error, the pipeline // execution ends in the forward phase and is "committed". // // If any of the Forward call fail, the executor switches to the backward phase // (roll back) and call the Backward function for each action completed. It // does not call the Backward function of the action that has failed. // // After rolling back all completed actions, it returns the original error // returned by the action that failed. func (p *Pipeline) Execute(params ...interface{}) error { var ( r Result err error ) if len(p.actions) == 0 { return errors.New("No actions to execute.") } fwCtx := FWContext{Params: params} for i, a := range p.actions { log.Debugf("[pipeline] running the Forward for the %s action", a.Name) if a.Forward == nil { err = errors.New("All actions must define the forward function.") } else if len(fwCtx.Params) < a.MinParams { err = errors.New("Not enough parameters to call Action.Forward.") } else { r, err = a.Forward(fwCtx) a.rMutex.Lock() a.result = r a.rMutex.Unlock() fwCtx.Previous = r } if err != nil { log.Debugf("[pipeline] error running the Forward for the %s action - %s", a.Name, err.Error()) p.rollback(i-1, params) return err } } return nil }
func (factory redismqQFactory) Handler(f func(*Message), names ...string) (Handler, error) { name := "default" if len(names) > 0 { name = names[0] } consumerName := fmt.Sprintf("handler-%d", time.Now().UnixNano()) queue, err := factory.get(name, consumerName) if err != nil { return nil, err } return &executor{ inner: func() { if message, err := queue.Get(5e9); err == nil { log.Debugf("Dispatching %q message to handler function.", message.Action) go func(m *Message) { f(m) if m.fail { queue.Put(m, 0) } }(message) } else { log.Debugf("Failed to get message from the queue: %s. Trying again...", err) if e, ok := err.(*net.OpError); ok && e.Op == "dial" { time.Sleep(5e9) } } }, }, nil }
// Flatten finds the images that need to be flattened and export/import // them in order to flatten them and logs errors when they happen. func Flatten(a provision.App) { if needsFlatten(a) { image := getImage(a) log.Debugf("Flatten: attempting to flatten image %s.", image) if err := flatten(image); err != nil { log.Errorf("Flatten: Caugh error while flattening image %s: %s", image, err.Error()) return } log.Debugf("Flatten: successfully flattened image %s.", image) } }
// commit commits an image in docker based in the container // and returns the image repository. func (c *container) commit() (string, error) { log.Debugf("commiting container %s", c.ID) repository := assembleImageName(c.AppName) opts := dclient.CommitContainerOptions{Container: c.ID, Repository: repository} image, err := dockerCluster().CommitContainer(opts) if err != nil { log.Errorf("Could not commit docker image: %s", err) return "", err } log.Debugf("image %s generated from container %s", image.ID, c.ID) replicateImage(repository) return repository, nil }
// Heal iterates through all juju machines verifying if // a juju-machine-agent is down and heal these machines. func (h instanceMachineHealer) Heal() error { p := JujuProvisioner{} output, _ := p.getOutput() for _, machine := range output.Machines { if machine.AgentState == "down" { log.Debugf("Healing juju-machine-agent in machine %s", machine.InstanceID) upStartCmd("stop", "juju-machine-agent", machine.IPAddress) upStartCmd("start", "juju-machine-agent", machine.IPAddress) } else { log.Debugf("juju-machine-agent for machine %s needs no cure, skipping...", machine.InstanceID) } } return nil }
func collectUnit(container container, units chan<- provision.Unit, wg *sync.WaitGroup) { defer wg.Done() unit := provision.Unit{ Name: container.ID, AppName: container.AppName, Type: container.Type, } if container.Status == "error" { unit.Status = provision.StatusDown units <- unit return } if container.Status == "running" { unit.Ip = container.HostAddr if ip, hostPort, err := container.networkInfo(); err == nil && (hostPort != container.HostPort || ip != container.IP) { err = fixContainer(&container, ip, hostPort) if err != nil { log.Errorf("error on fix container hostport for [container %s]", container.ID) return } } addr := strings.Replace(container.getAddress(), "http://", "", 1) conn, err := net.Dial("tcp", addr) if err != nil { unit.Status = provision.StatusUnreachable } else { conn.Close() unit.Status = provision.StatusStarted } log.Debugf("collected data for [container %s] - [app %s]", container.ID, container.AppName) units <- unit } }
func (c *Client) Bind(instance *ServiceInstance, app bind.App, unit bind.Unit) (map[string]string, error) { log.Debugf("Calling bind of instance %q and unit %q at %q API", instance.Name, unit.GetIp(), instance.ServiceName) var resp *http.Response params := map[string][]string{ "unit-host": {unit.GetIp()}, "app-host": {app.GetIp()}, } resp, err := c.issueRequest("/resources/"+instance.Name, "POST", params) if err != nil { if m, _ := regexp.MatchString("", err.Error()); m { return nil, fmt.Errorf("%s api is down.", instance.Name) } return nil, err } if err == nil && resp.StatusCode < 300 { var result map[string]string err = c.jsonFromResponse(resp, &result) if err != nil { return nil, err } return result, nil } if resp.StatusCode == http.StatusPreconditionFailed { return nil, &errors.HTTP{Code: resp.StatusCode, Message: "You cannot bind any app to this service instance because it is not ready yet."} } msg := "Failed to bind instance " + instance.Name + " to the unit " + unit.GetIp() + ": " + c.buildErrorMessage(err, resp) log.Error(msg) return nil, &errors.HTTP{Code: http.StatusInternalServerError, Message: msg} }
func (h elbInstanceHealer) checkInstances(names []string) ([]elbInstance, error) { if elbSupport, _ := config.GetBool("juju:use-elb"); !elbSupport { return nil, nil } lbs, err := h.describeLoadBalancers(names) if err != nil { return nil, err } var unhealthy []elbInstance description := "Instance has failed at least the UnhealthyThreshold number of health checks consecutively." state := "OutOfService" reasonCode := "Instance" for _, lb := range lbs { instances, err := h.describeInstancesHealth(lb) if err != nil { return nil, err } for _, instance := range instances { if instance.description == description && instance.state == state && instance.reasonCode == reasonCode { unhealthy = append(unhealthy, instance) } } } log.Debugf("Found %d unhealthy instances.", len(unhealthy)) return unhealthy, nil }
func (h *healer) heal() error { log.Debugf("healing tsuru healer with endpoint %s...", h.url) r, err := request("GET", h.url, nil) if err == nil { r.Body.Close() } return err }
// runCmd executes commands and log the given stdout and stderror. func runCmd(cmd string, args ...string) (string, error) { out := bytes.Buffer{} err := executor().Execute(cmd, args, nil, &out, &out) log.Debugf("running the cmd: %s with the args: %s", cmd, args) if err != nil { return "", &cmdError{cmd: cmd, args: args, err: err, out: out.String()} } return out.String(), nil }
func (p *Pipeline) rollback(index int, params []interface{}) { bwCtx := BWContext{Params: params} for i := index; i >= 0; i-- { log.Debugf("[pipeline] running Backward for %s action", p.actions[i].Name) if p.actions[i].Backward != nil { bwCtx.FWResult = p.actions[i].result p.actions[i].Backward(bwCtx) } } }
// getPrivateDns returns the private dns for an instance. func (h *instanceAgentsConfigHealer) getPrivateDns(instanceId string) (string, error) { log.Debugf("getting dns for %s", instanceId) resp, err := h.ec2().Instances([]string{instanceId}, nil) if err != nil { log.Errorf("error in gettings dns for %s", instanceId) log.Error(err.Error()) return "", err } dns := resp.Reservations[0].Instances[0].PrivateDNSName return dns, nil }
func (p *dockerProvisioner) Restart(app provision.App) error { containers, err := listAppContainers(app.GetName()) if err != nil { log.Errorf("Got error while getting app containers: %s", err) return err } var buf bytes.Buffer for _, c := range containers { err = c.ssh(&buf, &buf, "/var/lib/tsuru/restart") if err != nil { log.Errorf("Failed to restart %q: %s.", app.GetName(), err) log.Debug("Command outputs:") log.Debugf("out: %s", &buf) log.Debugf("err: %s", &buf) return err } buf.Reset() } return nil }
// Clone runs a git clone to clone the app repository in an app. func clone(p provision.Provisioner, app provision.App) ([]byte, error) { var buf bytes.Buffer path, err := repository.GetPath() if err != nil { return nil, fmt.Errorf("Tsuru is misconfigured: %s", err) } cmd := fmt.Sprintf("git clone %s %s --depth 1", repository.ReadOnlyURL(app.GetName()), path) err = p.ExecuteCommand(&buf, &buf, app, cmd) b := buf.Bytes() log.Debugf(`"git clone" output: %s`, b) return b, err }
// fetch runs a git fetch to update the code in the app. // // It works like Clone, fetching from the app remote repository. func fetch(p provision.Provisioner, app provision.App) ([]byte, error) { var buf bytes.Buffer path, err := repository.GetPath() if err != nil { return nil, fmt.Errorf("Tsuru is misconfigured: %s", err) } cmd := fmt.Sprintf("cd %s && git fetch origin", path) err = p.ExecuteCommand(&buf, &buf, app, cmd) b := buf.Bytes() log.Debugf(`"git fetch" output: %s`, b) return b, err }
func buildResult(maxSize int, units <-chan provision.Unit) <-chan []provision.Unit { ch := make(chan []provision.Unit, 1) go func() { result := make([]provision.Unit, 0, maxSize) for unit := range units { result = append(result, unit) log.Debugf("result for [container %s] - [app %s]", unit.Name, unit.AppName) } ch <- result }() return ch }
// Creates a new user and write his/her keys into authorized_keys file. // // The authorized_keys file belongs to the user running the process. func New(name string, keys map[string]string) (*User, error) { log.Debugf(`Creating user "%s"`, name) u := &User{Name: name} if v, err := u.isValid(); !v { log.Errorf("user.New: %s", err.Error()) return u, err } if err := db.Session.User().Insert(&u); err != nil { log.Errorf("user.New: %s", err.Error()) return u, err } return u, addKeys(keys, u.Name) }
// remove removes a docker container. func (c *container) remove() error { address := c.getAddress() log.Debugf("Removing container %s from docker", c.ID) err := dockerCluster().RemoveContainer(c.ID) if err != nil { log.Errorf("Failed to remove container from docker: %s", err) } c.removeHost() log.Debugf("Removing container %s from database", c.ID) coll := collection() defer coll.Close() if err := coll.RemoveId(c.ID); err != nil { log.Errorf("Failed to remove container from database: %s", err) } r, err := getRouter() if err != nil { log.Errorf("Failed to obtain router: %s", err) } if err := r.RemoveRoute(c.AppName, address); err != nil { log.Errorf("Failed to remove route: %s", err) } return nil }
// Heal verifies if the bootstrap private dns is different of the bootstrap // private dns setted into agents for each machine. // If the bootstrap private dns is wrong, Heal will injects the correct value. func (h instanceAgentsConfigHealer) Heal() error { conn, err := db.Conn() if err != nil { return err } defer conn.Close() var apps []app.App err = conn.Apps().Find(nil).All(&apps) if err != nil { return err } dns, err := h.bootstrapPrivateDns() if err != nil { return err } for _, app := range apps { for _, u := range app.ProvisionedUnits() { args := []string{"-o", "StrictHostKeyChecking no", "-q", "-l", "ubuntu", u.GetIp(), "grep", dns, "/etc/init/juju-machine-agent.conf"} err := executor().Execute("ssh", args, nil, nil, nil) if err != nil { log.Debugf("Injecting bootstrap private dns for machine %d", u.GetMachine()) args = []string{"-o", "StrictHostKeyChecking no", "-q", "-l", "ubuntu", u.GetIp(), "sudo", "sed", "-i", "'s/env JUJU_ZOOKEEPER=.*/env JUJU_ZOOKEEPER=\"" + dns + ":2181\"/g'", "/etc/init/juju-machine-agent.conf"} executor().Execute("ssh", args, nil, nil, nil) } agent := fmt.Sprintf("/etc/init/juju-%s.conf", strings.Join(strings.Split(u.GetName(), "/"), "-")) args = []string{"-o", "StrictHostKeyChecking no", "-q", "-l", "ubuntu", u.GetIp(), "grep", dns, agent} err = executor().Execute("ssh", args, nil, nil, nil) if err != nil { log.Debugf("Injecting bootstrap private dns for agent %s", agent) args = []string{"-o", "StrictHostKeyChecking no", "-q", "-l", "ubuntu", u.GetIp(), "sudo", "sed", "-i", "'s/env JUJU_ZOOKEEPER=.*/env JUJU_ZOOKEEPER=\"" + dns + ":2181\"/g'", agent} executor().Execute("ssh", args, nil, nil, nil) } } } return nil }
func (p *JujuProvisioner) ExecuteCommand(stdout, stderr io.Writer, app provision.App, cmd string, args ...string) error { units := p.startedUnits(app) log.Debugf("[execute cmd] - provisioned unit %#v", units) length := len(units) for i, unit := range units { if length > 1 { if i > 0 { fmt.Fprintln(stdout) } fmt.Fprintf(stdout, "Output from unit %q:\n\n", unit.GetName()) } err := p.executeCommandViaSSH(stdout, stderr, unit.GetMachine(), cmd, args...) if err != nil { return err } } return nil }
func (h ContainerHealer) Heal() error { containers, err := h.collectContainers() if err != nil { return err } unhealthy := h.unhealthyRunningContainers(containers) for _, c := range unhealthy { log.Debugf("Attempting to heal container %s", c.ID) if err := dockerCluster().StopContainer(c.ID, 10); err != nil { log.Errorf("Caught error while stopping container %s for healing: %s", c.ID, err.Error()) continue } if err := dockerCluster().StartContainer(c.ID, nil); err != nil { log.Errorf("Caught error while starting container %s for healing: %s", c.ID, err.Error()) } } return nil }
// HealTicker execute the registered healers registered by RegisterHealerTicker. func HealTicker(ticker <-chan time.Time) { log.Debug("running heal ticker") var wg sync.WaitGroup for _ = range ticker { healers := getHealers() wg.Add(len(healers)) for name, h := range healers { log.Debugf("running verification/heal for %s", name) go func(healer *healer) { err := healer.heal() if err != nil { log.Debug(err.Error()) } wg.Done() }(h) } wg.Wait() } }
func flatten(imageID string) error { config := docker.Config{ Image: imageID, Cmd: []string{"/bin/bash"}, AttachStdin: false, AttachStdout: false, AttachStderr: false, } _, c, err := dockerCluster().CreateContainer(&config) if err != nil { return err } buf := &bytes.Buffer{} if err := dockerCluster().ExportContainer(c.ID, buf); err != nil { log.Errorf("Flatten: Caugh error while exporting container %s: %s", c.ID, err) return err } // code to debug import issue r := bytes.NewReader(buf.Bytes()) f, _ := os.Create(fmt.Sprintf("/tmp/container-%s", c.ID)) b, err := ioutil.ReadAll(r) if err != nil { log.Debugf("Error while writing debug file: %s", err.Error()) } f.Write(b) f.Close() r.Seek(0, 0) out := &bytes.Buffer{} opts := dcli.ImportImageOptions{Repository: imageID, Source: "-"} if err := dockerCluster().ImportImage(opts, r, out); err != nil { log.Errorf("Flatten: Caugh error while importing image from container %s: %s", c.ID, err) return err } if err := dockerCluster().RemoveContainer(c.ID); err != nil { log.Errorf("Flatten: Caugh error while removing container %s: %s", c.ID, err) } removeFromRegistry(imageID) return nil }
func (c *container) ssh(stdout, stderr io.Writer, cmd string, args ...string) error { ip, _, err := c.networkInfo() if err != nil { return err } stdout = &filter{w: stdout, content: []byte("unable to resolve host")} url := fmt.Sprintf("http://%s:%d/container/%s/cmd", c.HostAddr, sshAgentPort(), ip) input := cmdInput{Cmd: cmd, Args: args} var buf bytes.Buffer err = json.NewEncoder(&buf).Encode(input) if err != nil { return err } log.Debugf("Running SSH on %s:%d: %s %s", c.HostAddr, sshAgentPort(), cmd, args) resp, err := http.Post(url, "application/json", &buf) if err != nil { return err } defer resp.Body.Close() _, err = io.Copy(stdout, resp.Body) return err }
func (p *JujuProvisioner) heal(units []provision.Unit) { var inst instance coll := p.unitsCollection() defer coll.Close() for _, unit := range units { err := coll.FindId(unit.Name).One(&inst) if err != nil { coll.Insert(instance{UnitName: unit.Name, InstanceID: unit.InstanceId}) } else if unit.InstanceId == inst.InstanceID { continue } else { format := "[juju] instance-id of unit %q changed from %q to %q. Healing." log.Debugf(format, unit.Name, inst.InstanceID, unit.InstanceId) if p.elbSupport() { router, err := Router() if err != nil { continue } router.RemoveRoute(unit.AppName, inst.InstanceID) err = router.AddRoute(unit.AppName, unit.InstanceId) if err != nil { format := "[juju] Could not register instance %q in the load balancer: %s." log.Errorf(format, unit.InstanceId, err) continue } } if inst.InstanceID != "pending" { msg := queue.Message{ Action: app.RegenerateApprcAndStart, Args: []string{unit.AppName, unit.Name}, } app.Enqueue(msg) } inst.InstanceID = unit.InstanceId coll.UpdateId(unit.Name, inst) } } }
// Heal iterates through all juju units verifying if // a juju-unit-agent is down and heal these machines. func (h instanceUnitHealer) Heal() error { conn, err := db.Conn() if err != nil { return err } defer conn.Close() var apps []app.App err = conn.Apps().Find(nil).All(&apps) if err != nil { return err } for _, app := range apps { for _, u := range app.ProvisionedUnits() { agent := fmt.Sprintf("juju-%s", strings.Join(strings.Split(u.GetName(), "/"), "-")) if u.GetStatus() == provision.StatusDown { log.Debugf("Healing %s", agent) upStartCmd("stop", agent, u.GetIp()) upStartCmd("start", agent, u.GetIp()) } } } return nil }
"github.com/globocom/tsuru/db" "github.com/globocom/tsuru/log" "github.com/globocom/tsuru/provision" "github.com/globocom/tsuru/queue" "labix.org/v2/mgo/bson" "net" "strings" ) var createContainer = action.Action{ Name: "create-container", Forward: func(ctx action.FWContext) (action.Result, error) { app := ctx.Params[0].(provision.App) imageId := ctx.Params[1].(string) cmds := ctx.Params[2].([]string) log.Debugf("create container for app %s, based on image %s, with cmds %s", app.GetName(), imageId, cmds) cont, err := newContainer(app, imageId, cmds) if err != nil { log.Errorf("error on create container for app %s - %s", app.GetName(), err) return nil, err } return cont, nil }, Backward: func(ctx action.BWContext) { c := ctx.FWResult.(container) dockerCluster().RemoveContainer(docker.RemoveContainerOptions{ID: c.ID}) coll := collection() defer coll.Close() coll.Remove(bson.M{"id": c.ID}) }, }