func (c *context) watchFormations(events chan<- *FormationEvent, hostEvents chan<- *host.Event) { g := grohl.NewContext(grohl.Data{"fn": "watchFormations"}) c.syncCluster(hostEvents) if events != nil { events <- &FormationEvent{} } var attempts int var lastUpdatedAt time.Time for { // wait a second if we've tried more than once attempts++ if attempts > 1 { time.Sleep(time.Second) } g.Log(grohl.Data{"at": "connect", "attempt": attempts}) updates, err := c.StreamFormations(&lastUpdatedAt) for ef := range updates.Chan { // we are now connected so reset attempts attempts = 0 if ef.App == nil { // sentinel continue } lastUpdatedAt = ef.UpdatedAt f := c.formations.Get(ef.App.ID, ef.Release.ID) if f != nil { g.Log(grohl.Data{"app.id": ef.App.ID, "release.id": ef.Release.ID, "at": "update"}) f.SetProcesses(ef.Processes) } else { g.Log(grohl.Data{"app.id": ef.App.ID, "release.id": ef.Release.ID, "at": "new"}) f = NewFormation(c, ef) c.formations.Add(f) } // check for omnipresence for _, proctype := range f.Release.Processes { if proctype.Omni { c.omniMtx.Lock() c.omni[f] = struct{}{} c.omniMtx.Unlock() break } } go func() { f.Rectify() if events != nil { events <- &FormationEvent{Formation: f} } }() } if *err != nil { g.Log(grohl.Data{"at": "error", "error": *err}) } g.Log(grohl.Data{"at": "disconnect"}) updates.Close() } }
func (f *Formation) remove(n int, name string, hostID string) { g := grohl.NewContext(grohl.Data{"fn": "remove", "app.id": f.AppID, "release.id": f.Release.ID}) i := 0 sj := make(sortJobs, 0, len(f.jobs[name])) for _, job := range f.jobs[name] { sj = append(sj, job) } sj.Sort() for _, job := range sj { g.Log(grohl.Data{"host.id": job.HostID, "job.id": job.ID}) if hostID != "" && job.HostID != hostID { // remove from a specific host continue } // TODO: robust host handling if err := f.c.hosts.Get(job.HostID).StopJob(job.ID); err != nil { g.Log(grohl.Data{"at": "error", "err": err.Error()}) // TODO: handle error } f.jobs.Remove(job) if i++; i == n { break } } }
func (c *libvirtContainer) cleanup() error { g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "cleanup", "job.id": c.job.ID}) g.Log(grohl.Data{"at": "start"}) if err := syscall.Unmount(filepath.Join(c.RootPath, ".containerinit"), 0); err != nil { g.Log(grohl.Data{"at": "unmount", "file": ".containerinit", "status": "error", "err": err}) } if err := syscall.Unmount(filepath.Join(c.RootPath, "etc/resolv.conf"), 0); err != nil { g.Log(grohl.Data{"at": "unmount", "file": "resolv.conf", "status": "error", "err": err}) } if err := pinkerton.Cleanup(c.job.ID); err != nil { g.Log(grohl.Data{"at": "pinkerton", "status": "error", "err": err}) } for _, m := range c.job.Config.Mounts { if err := syscall.Unmount(filepath.Join(c.RootPath, m.Location), 0); err != nil { g.Log(grohl.Data{"at": "unmount", "location": m.Location, "status": "error", "err": err}) } } for _, p := range c.job.Config.Ports { if err := c.l.forwarder.Remove(&net.TCPAddr{IP: c.IP, Port: p.Port}, p.RangeEnd, p.Proto); err != nil { g.Log(grohl.Data{"at": "iptables", "status": "error", "err": err, "port": p.Port}) } c.l.ports[p.Proto].Put(uint16(p.Port)) } ipallocator.ReleaseIP(defaultNet, &c.IP) g.Log(grohl.Data{"at": "finish"}) return nil }
func (m *LogMux) drainTo(w io.Writer) { defer close(m.donec) g := grohl.NewContext(grohl.Data{"at": "logmux_drain"}) for { msg, ok := <-m.logc if !ok { return // shutdown } _, err := w.Write(rfc6587.Bytes(msg)) if err != nil { g.Log(grohl.Data{"status": "error", "err": err.Error()}) // write logs to local logger when the writer fails g.Log(grohl.Data{"msg": msg.String()}) for msg := range m.logc { g.Log(grohl.Data{"msg": msg.String()}) } return // shutdown } } }
func (c *libvirtContainer) cleanup() error { g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "cleanup", "job.id": c.job.ID}) g.Log(grohl.Data{"at": "start"}) if err := syscall.Unmount(filepath.Join(c.RootPath, ".containerinit"), 0); err != nil { g.Log(grohl.Data{"at": "unmount", "file": ".containerinit", "status": "error", "err": err}) } if err := syscall.Unmount(filepath.Join(c.RootPath, "etc/resolv.conf"), 0); err != nil { g.Log(grohl.Data{"at": "unmount", "file": "resolv.conf", "status": "error", "err": err}) } if err := c.l.pinkerton.Cleanup(c.job.ID); err != nil { g.Log(grohl.Data{"at": "pinkerton", "status": "error", "err": err}) } for _, m := range c.job.Config.Mounts { if err := syscall.Unmount(filepath.Join(c.RootPath, m.Location), 0); err != nil { g.Log(grohl.Data{"at": "unmount", "location": m.Location, "status": "error", "err": err}) } } for _, v := range c.job.Config.Volumes { if err := syscall.Unmount(filepath.Join(c.RootPath, v.Target), 0); err != nil { g.Log(grohl.Data{"at": "unmount", "target": v.Target, "volumeID": v.VolumeID, "status": "error", "err": err}) } } if !c.job.Config.HostNetwork && c.l.bridgeNet != nil { c.l.ipalloc.ReleaseIP(c.l.bridgeNet, c.IP) } g.Log(grohl.Data{"at": "finish"}) return nil }
func (l *LibvirtLXCBackend) Cleanup() error { g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "Cleanup"}) l.containersMtx.Lock() ids := make([]string, 0, len(l.containers)) for id := range l.containers { ids = append(ids, id) } l.containersMtx.Unlock() g.Log(grohl.Data{"at": "start", "count": len(ids)}) errs := make(chan error) for _, id := range ids { go func(id string) { g.Log(grohl.Data{"at": "stop", "job.id": id}) err := l.Stop(id) if err != nil { g.Log(grohl.Data{"at": "error", "job.id": id, "err": err.Error()}) } errs <- err }(id) } var err error for i := 0; i < len(ids); i++ { stopErr := <-errs if stopErr != nil { err = stopErr } } g.Log(grohl.Data{"at": "finish"}) return err }
// waitExit waits for the libvirt domain to be marked as done or five seconds to // elapse func (c *libvirtContainer) waitExit() { g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "waitExit", "job.id": c.job.ID}) g.Log(grohl.Data{"at": "start"}) domain, err := c.l.libvirt.LookupDomainByName(c.job.ID) if err != nil { g.Log(grohl.Data{"at": "domain_error", "err": err.Error()}) return } defer domain.Free() maxWait := time.After(5 * time.Second) for { state, err := domain.GetState() if err != nil { g.Log(grohl.Data{"at": "state_error", "err": err.Error()}) return } if state[0] != libvirt.VIR_DOMAIN_RUNNING && state[0] != libvirt.VIR_DOMAIN_SHUTDOWN { g.Log(grohl.Data{"at": "done"}) return } select { case <-maxWait: g.Log(grohl.Data{"at": "maxWait"}) return default: time.Sleep(100 * time.Millisecond) } } }
func (h *attachHandler) attach(req *host.AttachReq, conn io.ReadWriteCloser) { defer conn.Close() g := grohl.NewContext(grohl.Data{"fn": "attach", "job.id": req.JobID}) g.Log(grohl.Data{"at": "start"}) attachWait := make(chan struct{}) job := h.state.AddAttacher(req.JobID, attachWait) if job == nil { defer h.state.RemoveAttacher(req.JobID, attachWait) if _, err := conn.Write([]byte{host.AttachWaiting}); err != nil { return } // TODO: add timeout <-attachWait job = h.state.GetJob(req.JobID) } success := make(chan struct{}) failed := make(chan struct{}) opts := &AttachRequest{ Job: job, Logs: req.Flags&host.AttachFlagLogs != 0, Stream: req.Flags&host.AttachFlagStream != 0, Height: req.Height, Width: req.Width, Attached: success, ReadWriter: conn, Streams: make([]string, 0, 3), } if req.Flags&host.AttachFlagStdin != 0 { opts.Streams = append(opts.Streams, "stdin") } if req.Flags&host.AttachFlagStdout != 0 { opts.Streams = append(opts.Streams, "stdout") } if req.Flags&host.AttachFlagStderr != 0 { opts.Streams = append(opts.Streams, "stderr") } go func() { select { case <-success: conn.Write([]byte{host.AttachSuccess}) close(success) case <-failed: } close(attachWait) }() if err := h.backend.Attach(opts); err != nil { select { case <-success: default: close(failed) conn.Write(append([]byte{host.AttachError}, err.Error()...)) } g.Log(grohl.Data{"status": "error", "err": err}) return } g.Log(grohl.Data{"at": "finish"}) }
func (c *context) watchFormations() { g := grohl.NewContext(grohl.Data{"fn": "watchFormations"}) c.syncCluster() var attempts int var lastUpdatedAt time.Time for { // wait a second if we've tried more than once attempts++ if attempts > 1 { time.Sleep(time.Second) } g.Log(grohl.Data{"at": "connect", "attempt": attempts}) updates := make(chan *ct.ExpandedFormation) streamCtrl, err := c.StreamFormations(&lastUpdatedAt, updates) if err != nil { g.Log(grohl.Data{"at": "error", "error": err}) continue } for ef := range updates { // we are now connected so reset attempts attempts = 0 if ef.App == nil { // sentinel continue } lastUpdatedAt = ef.UpdatedAt f := c.formations.Get(ef.App.ID, ef.Release.ID) if f != nil { g.Log(grohl.Data{"app.id": ef.App.ID, "release.id": ef.Release.ID, "at": "update"}) f.SetProcesses(ef.Processes) } else { g.Log(grohl.Data{"app.id": ef.App.ID, "release.id": ef.Release.ID, "at": "new"}) f = NewFormation(c, ef) c.formations.Add(f) } // check for omnipresence for _, proctype := range f.Release.Processes { if proctype.Omni { c.omniMtx.Lock() c.omni[f] = struct{}{} c.omniMtx.Unlock() break } } go f.Rectify() } if streamCtrl.Err() != nil { g.Log(grohl.Data{"at": "disconnect", "err": streamCtrl.Err()}) } g.Log(grohl.Data{"at": "disconnect"}) } }
func (f *Formation) add(n int, name string, hostID string) { g := grohl.NewContext(grohl.Data{"fn": "add", "app.id": f.AppID, "release.id": f.Release.ID}) for i := 0; i < n; i++ { job, err := f.start(name, hostID) if err != nil { // TODO: log/handle error continue } g.Log(grohl.Data{"host.id": job.HostID, "job.id": job.ID}) } }
func (l *LibvirtLXCBackend) OpenLogs(buffers host.LogBuffers) error { l.containersMtx.RLock() defer l.containersMtx.RUnlock() for id, c := range l.containers { g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "OpenLogs", "job.id": id}) if err := c.followLogs(g, buffers[id]); err != nil { return err } } return nil }
func (f *Formation) rectify() { g := grohl.NewContext(grohl.Data{"fn": "rectify", "app.id": f.AppID, "release.id": f.Release.ID}) var hosts []*cluster.Host if _, ok := f.c.omni[f]; ok { hosts = f.c.hosts.List() } // update job counts for t, expected := range f.Processes { if f.Release.Processes[t].Omni { // get job counts per host hostCounts := make(map[string]int, len(hosts)) for _, h := range hosts { hostCounts[h.ID()] = 0 } for k := range f.jobs[t] { hostCounts[k.hostID]++ } // update per host for hostID, actual := range hostCounts { diff := expected - actual g.Log(grohl.Data{"at": "update", "type": t, "expected": expected, "actual": actual, "diff": diff}) if diff > 0 { f.add(diff, t, hostID) } else if diff < 0 { f.remove(-diff, t, hostID) } } } else { actual := len(f.jobs[t]) diff := expected - actual g.Log(grohl.Data{"at": "update", "type": t, "expected": expected, "actual": actual, "diff": diff}) if diff > 0 { f.add(diff, t, "") } else if diff < 0 { f.remove(-diff, t, "") } } } // remove process types for t, jobs := range f.jobs { // ignore one-off jobs which have no type if t == "" { continue } if _, exists := f.Processes[t]; !exists { g.Log(grohl.Data{"at": "cleanup", "type": t, "count": len(jobs)}) f.remove(len(jobs), t, "") } } }
func (c *libvirtContainer) cleanup() error { g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "cleanup", "job.id": c.job.ID}) g.Log(grohl.Data{"at": "start"}) c.unbindMounts() if err := c.l.pinkerton.Cleanup(c.job.ID); err != nil { g.Log(grohl.Data{"at": "pinkerton", "status": "error", "err": err}) } if !c.job.Config.HostNetwork && c.l.bridgeNet != nil { c.l.ipalloc.ReleaseIP(c.l.bridgeNet, c.IP) } g.Log(grohl.Data{"at": "finish"}) return nil }
func (l *LibvirtLXCBackend) CloseLogs() (host.LogBuffers, error) { g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "CloseLogs"}) l.logStreamMtx.Lock() defer l.logStreamMtx.Unlock() buffers := make(host.LogBuffers, len(l.logStreams)) for id, streams := range l.logStreams { g.Log(grohl.Data{"job.id": id}) buffer := make(host.LogBuffer, len(streams)) for fd, stream := range streams { buffer[fd] = stream.Close() } buffers[id] = buffer delete(l.logStreams, id) } return buffers, nil }
func (f *Formation) restart(stoppedJob *Job) error { g := grohl.NewContext(grohl.Data{"fn": "restart", "app.id": f.AppID, "release.id": f.Release.ID}) g.Log(grohl.Data{"old.host.id": stoppedJob.HostID, "old.job.id": stoppedJob.ID}) f.jobs.Remove(stoppedJob) var hostID string if f.Release.Processes[stoppedJob.Type].Omni { hostID = stoppedJob.HostID } newJob, err := f.start(stoppedJob.Type, hostID) if err != nil { return err } newJob.restarts = stoppedJob.restarts + 1 g.Log(grohl.Data{"new.host.id": newJob.HostID, "new.job.id": newJob.ID}) return nil }
func (f *Formation) remove(n int, name string, hostID string) { g := grohl.NewContext(grohl.Data{"fn": "remove", "app.id": f.AppID, "release.id": f.Release.ID}) i := 0 for _, job := range f.jobs[name] { g.Log(grohl.Data{"host.id": job.HostID, "job.id": job.ID}) if hostID != "" && job.HostID != hostID { // remove from a specific host continue } // TODO: robust host handling if err := f.c.hosts.Get(job.HostID).StopJob(job.ID); err != nil { // TODO: log/handle error } f.jobs.Remove(job) if i++; i == n { break } } }
func (m *LogMux) follow(r io.Reader, hdr *rfc5424.Header) { defer m.producerwg.Done() g := grohl.NewContext(grohl.Data{"at": "logmux_follow"}) s := bufio.NewScanner(r) for s.Scan() { msg := rfc5424.NewMessage(hdr, s.Bytes()) select { case m.logc <- msg: default: // throw away msg if logc buffer is full } } if s.Err() != nil { g.Log(grohl.Data{"status": "error", "err": s.Err()}) } }
func (c *libvirtContainer) unbindMounts() { g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "unbind_mounts", "job.id": c.job.ID}) g.Log(grohl.Data{"at": "start"}) if err := syscall.Unmount(filepath.Join(c.RootPath, ".containerinit"), 0); err != nil { g.Log(grohl.Data{"at": "unmount", "file": ".containerinit", "status": "error", "err": err}) } if err := syscall.Unmount(filepath.Join(c.RootPath, "etc/resolv.conf"), 0); err != nil { g.Log(grohl.Data{"at": "unmount", "file": "resolv.conf", "status": "error", "err": err}) } for _, m := range c.job.Config.Mounts { if err := syscall.Unmount(filepath.Join(c.RootPath, m.Location), 0); err != nil { g.Log(grohl.Data{"at": "unmount", "location": m.Location, "status": "error", "err": err}) } } for _, v := range c.job.Config.Volumes { if err := syscall.Unmount(filepath.Join(c.RootPath, v.Target), 0); err != nil { g.Log(grohl.Data{"at": "unmount", "target": v.Target, "volumeID": v.VolumeID, "status": "error", "err": err}) } } g.Log(grohl.Data{"at": "finish"}) }
func (d *DockerBackend) Cleanup() error { g := grohl.NewContext(grohl.Data{"backend": "docker", "fn": "cleanup"}) g.Log(grohl.Data{"at": "start"}) containers, err := d.docker.ListContainers(docker.ListContainersOptions{}) if err != nil { g.Log(grohl.Data{"at": "list", "status": "error", "err": err}) return err } outer: for _, c := range containers { for _, name := range c.Names { if strings.HasPrefix(name, "/flynn-") { g.Log(grohl.Data{"at": "kill", "container.id": c.ID, "container.name": name}) if err := d.docker.KillContainer(docker.KillContainerOptions{ID: c.ID}); err != nil { g.Log(grohl.Data{"at": "kill", "container.id": c.ID, "container.name": name, "status": "error", "err": err}) } continue outer } } } g.Log(grohl.Data{"at": "finish"}) return nil }
func (c *context) syncJobStates() error { g := grohl.NewContext(grohl.Data{"fn": "syncJobStates"}) g.Log(grohl.Data{"at": "appList"}) apps, err := c.AppList() if err != nil { g.Log(grohl.Data{"at": "appList", "status": "error", "err": err}) return err } for _, app := range apps { g.Log(grohl.Data{"at": "jobList", "app.id": app.ID}) jobs, err := c.JobList(app.ID) if err != nil { g.Log(grohl.Data{"at": "jobList", "app.id": app.ID, "status": "error", "err": err}) continue } for _, job := range jobs { gg := g.New(grohl.Data{"job.id": job.ID, "app.id": app.ID, "state": job.State}) gg.Log(grohl.Data{"at": "checkState"}) if job.State != "up" { continue } hostID, err := cluster.ExtractHostID(job.ID) if err != nil { gg.Log(grohl.Data{"at": "jobHostID", "status": "error", "err": err}) continue } if j := c.jobs.Get(hostID, job.ID); j != nil { continue } job.State = "down" gg.Log(grohl.Data{"at": "putJob", "state": "down"}) go c.PutJob(job) } } return nil }
func (m *manifestRunner) runManifest(r io.Reader) (map[string]*ManifestData, error) { g := grohl.NewContext(grohl.Data{"fn": "run_manifest"}) var services []*manifestService if err := json.NewDecoder(r).Decode(&services); err != nil { return nil, err } serviceData := make(map[string]*ManifestData, len(services)) m.state.mtx.Lock() for _, job := range m.state.jobs { if job.ManifestID == "" || job.Status != host.StatusRunning { continue } var service *manifestService for _, service = range services { if service.ID == job.ManifestID { break } } if service == nil { continue } g.Log(grohl.Data{"at": "restore", "service": service.ID, "job.id": job.Job.ID}) data := &ManifestData{ ExternalIP: m.externalAddr, InternalIP: job.InternalIP, Env: job.Job.Config.Env, Services: serviceData, ports: m.ports["tcp"], readonly: true, } data.TCPPorts = make([]int, 0, len(job.Job.Config.Ports)) for _, p := range job.Job.Config.Ports { if p.Proto != "tcp" { continue } data.TCPPorts = append(data.TCPPorts, p.Port) } serviceData[service.ID] = data } m.state.mtx.Unlock() for _, service := range services { if _, exists := serviceData[service.ID]; exists { continue } data := &ManifestData{ Env: parseEnviron(), Services: serviceData, ExternalIP: m.externalAddr, ports: m.ports["tcp"], } // Add explicit tcp ports to data.TCPPorts for _, port := range service.TCPPorts { port, err := strconv.Atoi(port) if err != nil { return nil, err } data.TCPPorts = append(data.TCPPorts, port) } var buf bytes.Buffer interp := func(s string) (string, error) { t, err := template.New("arg").Parse(s) if err != nil { return "", err } if err := t.Execute(&buf, data); err != nil { return "", err } defer buf.Reset() return buf.String(), nil } args := make([]string, 0, len(service.Args)) for _, arg := range service.Args { arg, err := interp(arg) if err != nil { return nil, err } if strings.TrimSpace(arg) == "" { continue } args = append(args, arg) } var err error for k, v := range service.Env { service.Env[k], err = interp(v) if err != nil { return nil, err } } data.Env = service.Env if service.Image == "" { service.Image = "https://registry.hub.docker.com/flynn/" + service.ID } if service.ImageID != "" { service.Image += "?id=" + service.ImageID } job := &host.Job{ ID: cluster.RandomJobID("flynn-" + service.ID + "-"), Artifact: host.Artifact{ Type: "docker", URI: service.Image, }, Config: host.ContainerConfig{ Entrypoint: service.Entrypoint, Cmd: args, Env: data.Env, }, } if job.Config.Env == nil { job.Config.Env = make(map[string]string) } job.Config.Env["EXTERNAL_IP"] = m.externalAddr job.Config.Ports = make([]host.Port, len(data.TCPPorts)) for i, port := range data.TCPPorts { job.Config.Ports[i] = host.Port{Proto: "tcp", Port: port} } if len(job.Config.Ports) == 0 { job.Config.Ports = []host.Port{{Proto: "tcp"}} } if err := m.backend.Run(job); err != nil { return nil, err } m.state.SetManifestID(job.ID, service.ID) activeJob := m.state.GetJob(job.ID) data.InternalIP = activeJob.InternalIP data.readonly = true serviceData[service.ID] = data } return serviceData, nil }
func runDaemon(args *docopt.Args) { hostname, _ := os.Hostname() externalIP := args.String["--external-ip"] stateFile := args.String["--state"] hostID := args.String["--id"] force := args.Bool["--force"] volPath := args.String["--volpath"] backendName := args.String["--backend"] flynnInit := args.String["--flynn-init"] nsumount := args.String["--nsumount"] logDir := args.String["--log-dir"] discoveryToken := args.String["--discovery"] var peerIPs []string if args.String["--peer-ips"] != "" { peerIPs = strings.Split(args.String["--peer-ips"], ",") } grohl.AddContext("app", "host") grohl.Log(grohl.Data{"at": "start"}) g := grohl.NewContext(grohl.Data{"fn": "main"}) if hostID == "" { hostID = strings.Replace(hostname, "-", "", -1) } if strings.Contains(hostID, "-") { shutdown.Fatal("host id must not contain dashes") } if externalIP == "" { var err error externalIP, err = config.DefaultExternalIP() if err != nil { shutdown.Fatal(err) } } publishAddr := net.JoinHostPort(externalIP, "1113") if discoveryToken != "" { // TODO: retry discoveryID, err := discovery.RegisterInstance(discovery.Info{ ClusterURL: discoveryToken, InstanceURL: "http://" + publishAddr, Name: hostID, }) if err != nil { g.Log(grohl.Data{"at": "register_discovery", "status": "error", "err": err.Error()}) shutdown.Fatal(err) } g.Log(grohl.Data{"at": "register_discovery", "id": discoveryID}) } state := NewState(hostID, stateFile) var backend Backend var err error // create volume manager vman, err := volumemanager.New( filepath.Join(volPath, "volumes.bolt"), func() (volume.Provider, error) { // use a zpool backing file size of either 70% of the device on which // volumes will reside, or 100GB if that can't be determined. var size int64 var dev syscall.Statfs_t if err := syscall.Statfs(volPath, &dev); err == nil { size = (dev.Bsize * int64(dev.Blocks) * 7) / 10 } else { size = 100000000000 } g.Log(grohl.Data{"at": "zpool_size", "size": size}) return zfsVolume.NewProvider(&zfsVolume.ProviderConfig{ DatasetName: "flynn-default", Make: &zfsVolume.MakeDev{ BackingFilename: filepath.Join(volPath, "zfs/vdev/flynn-default-zpool.vdev"), Size: size, }, WorkingDir: filepath.Join(volPath, "zfs"), }) }, ) if err != nil { shutdown.Fatal(err) } mux := logmux.New(1000) shutdown.BeforeExit(func() { mux.Close() }) switch backendName { case "libvirt-lxc": backend, err = NewLibvirtLXCBackend(state, vman, logDir, flynnInit, nsumount, mux) default: log.Fatalf("unknown backend %q", backendName) } if err != nil { shutdown.Fatal(err) } backend.SetDefaultEnv("EXTERNAL_IP", externalIP) discoverdManager := NewDiscoverdManager(backend, mux, hostID, publishAddr) publishURL := "http://" + publishAddr host := &Host{ id: hostID, url: publishURL, state: state, backend: backend, status: &host.HostStatus{ID: hostID, URL: publishURL}, } // stopJobs stops all jobs, leaving discoverd until the end so other // jobs can unregister themselves on shutdown. stopJobs := func() (err error) { var except []string host.statusMtx.RLock() if host.status.Discoverd != nil && host.status.Discoverd.JobID != "" { except = []string{host.status.Discoverd.JobID} } host.statusMtx.RUnlock() if err := backend.Cleanup(except); err != nil { return err } for _, id := range except { if e := backend.Stop(id); e != nil { err = e } } return } resurrect, err := state.Restore(backend) if err != nil { shutdown.Fatal(err) } shutdown.BeforeExit(func() { // close discoverd before stopping jobs so we can unregister first discoverdManager.Close() stopJobs() }) shutdown.BeforeExit(func() { if err := state.MarkForResurrection(); err != nil { log.Print("error marking for resurrection", err) } }) if err := serveHTTP( host, &attachHandler{state: state, backend: backend}, cluster.NewClient(), vman, discoverdManager.ConnectLocal, ); err != nil { shutdown.Fatal(err) } if force { if err := stopJobs(); err != nil { shutdown.Fatal(err) } } if discoveryToken != "" { instances, err := discovery.GetCluster(discoveryToken) if err != nil { // TODO(titanous): retry? shutdown.Fatal(err) } peerIPs = make([]string, 0, len(instances)) for _, inst := range instances { u, err := url.Parse(inst.URL) if err != nil { continue } ip, _, err := net.SplitHostPort(u.Host) if err != nil || ip == externalIP { continue } peerIPs = append(peerIPs, ip) } } if err := discoverdManager.ConnectPeer(peerIPs); err != nil { // No peers have working discoverd, so resurrect any available jobs resurrect() } <-make(chan struct{}) }
func (c *libvirtContainer) watch(ready chan<- error) error { g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "watch_container", "job.id": c.job.ID}) g.Log(grohl.Data{"at": "start"}) defer func() { // TODO: kill containerinit/domain if it is still running c.l.containersMtx.Lock() delete(c.l.containers, c.job.ID) c.l.containersMtx.Unlock() c.cleanup() close(c.done) }() var symlinked bool var err error symlink := "/tmp/containerinit-rpc." + c.job.ID socketPath := path.Join(c.RootPath, containerinit.SocketPath) for startTime := time.Now(); time.Since(startTime) < 10*time.Second; time.Sleep(time.Millisecond) { if !symlinked { // We can't connect to the socket file directly because // the path to it is longer than 108 characters (UNIX_PATH_MAX). // Create a temporary symlink to connect to. if err = os.Symlink(socketPath, symlink); err != nil && !os.IsExist(err) { g.Log(grohl.Data{"at": "symlink_socket", "status": "error", "err": err, "source": socketPath, "target": symlink}) continue } defer os.Remove(symlink) symlinked = true } c.Client, err = containerinit.NewClient(symlink) if err == nil { break } } if ready != nil { ready <- err } if err != nil { g.Log(grohl.Data{"at": "connect", "status": "error", "err": err.Error()}) c.l.state.SetStatusFailed(c.job.ID, errors.New("failed to connect to container")) d, e := c.l.libvirt.LookupDomainByName(c.job.ID) if e != nil { return e } if err := d.Destroy(); err != nil { g.Log(grohl.Data{"at": "destroy", "status": "error", "err": err.Error()}) } return err } defer c.Client.Close() c.l.containersMtx.Lock() c.l.containers[c.job.ID] = c c.l.containersMtx.Unlock() if !c.job.Config.DisableLog && !c.job.Config.TTY { g.Log(grohl.Data{"at": "get_stdout"}) stdout, stderr, initLog, err := c.Client.GetStreams() if err != nil { g.Log(grohl.Data{"at": "get_streams", "status": "error", "err": err.Error()}) return err } log := c.l.openLog(c.job.ID) defer log.Close() muxConfig := logmux.Config{ AppID: c.job.Metadata["flynn-controller.app"], HostID: c.l.state.id, JobType: c.job.Metadata["flynn-controller.type"], JobID: c.job.ID, } // TODO(benburkert): remove file logging once attach proto uses logaggregator streams := []io.Reader{stdout, stderr} for i, stream := range streams { bufr, bufw := io.Pipe() muxr, muxw := io.Pipe() go func(r io.Reader, pw1, pw2 *io.PipeWriter) { mw := io.MultiWriter(pw1, pw2) _, err := io.Copy(mw, r) pw1.CloseWithError(err) pw2.CloseWithError(err) }(stream, bufw, muxw) fd := i + 1 go log.Follow(fd, bufr) go c.l.mux.Follow(muxr, fd, muxConfig) } go log.Follow(3, initLog) } g.Log(grohl.Data{"at": "watch_changes"}) for change := range c.Client.StreamState() { g.Log(grohl.Data{"at": "change", "state": change.State.String()}) if change.Error != "" { err := errors.New(change.Error) g.Log(grohl.Data{"at": "change", "status": "error", "err": err}) c.Client.Resume() c.l.state.SetStatusFailed(c.job.ID, err) return err } switch change.State { case containerinit.StateInitial: g.Log(grohl.Data{"at": "wait_attach"}) c.l.state.WaitAttach(c.job.ID) g.Log(grohl.Data{"at": "resume"}) c.Client.Resume() case containerinit.StateRunning: g.Log(grohl.Data{"at": "running"}) c.l.state.SetStatusRunning(c.job.ID) // if the job was stopped before it started, exit if c.l.state.GetJob(c.job.ID).ForceStop { c.Stop() } case containerinit.StateExited: g.Log(grohl.Data{"at": "exited", "status": change.ExitStatus}) c.Client.Resume() c.l.state.SetStatusDone(c.job.ID, change.ExitStatus) return nil case containerinit.StateFailed: g.Log(grohl.Data{"at": "failed"}) c.Client.Resume() c.l.state.SetStatusFailed(c.job.ID, errors.New("container failed to start")) return nil } } g.Log(grohl.Data{"at": "unknown_failure"}) c.l.state.SetStatusFailed(c.job.ID, errors.New("unknown failure")) return nil }
func (l *LibvirtLXCBackend) Run(job *host.Job, runConfig *RunConfig) (err error) { g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "run", "job.id": job.ID}) g.Log(grohl.Data{"at": "start", "job.artifact.uri": job.Artifact.URI, "job.cmd": job.Config.Cmd}) if !job.Config.HostNetwork { <-l.networkConfigured } if _, ok := job.Config.Env["DISCOVERD"]; !ok { <-l.discoverdConfigured } if runConfig == nil { runConfig = &RunConfig{} } container := &libvirtContainer{ l: l, job: job, done: make(chan struct{}), } if !job.Config.HostNetwork { container.IP, err = l.ipalloc.RequestIP(l.bridgeNet, runConfig.IP) if err != nil { g.Log(grohl.Data{"at": "request_ip", "status": "error", "err": err}) return err } } defer func() { if err != nil { go container.cleanup() } }() g.Log(grohl.Data{"at": "pull_image"}) layers, err := l.pinkertonPull(job.Artifact.URI) if err != nil { g.Log(grohl.Data{"at": "pull_image", "status": "error", "err": err}) return err } imageID, err := pinkerton.ImageID(job.Artifact.URI) if err == pinkerton.ErrNoImageID && len(layers) > 0 { imageID = layers[len(layers)-1].ID } else if err != nil { g.Log(grohl.Data{"at": "image_id", "status": "error", "err": err}) return err } g.Log(grohl.Data{"at": "read_config"}) imageConfig, err := readDockerImageConfig(imageID) if err != nil { g.Log(grohl.Data{"at": "read_config", "status": "error", "err": err}) return err } g.Log(grohl.Data{"at": "checkout"}) rootPath, err := l.pinkerton.Checkout(job.ID, imageID) if err != nil { g.Log(grohl.Data{"at": "checkout", "status": "error", "err": err}) return err } container.RootPath = rootPath g.Log(grohl.Data{"at": "mount"}) if err := bindMount(l.InitPath, filepath.Join(rootPath, ".containerinit"), false, true); err != nil { g.Log(grohl.Data{"at": "mount", "file": ".containerinit", "status": "error", "err": err}) return err } if err := os.MkdirAll(filepath.Join(rootPath, "etc"), 0755); err != nil { g.Log(grohl.Data{"at": "mkdir", "dir": "etc", "status": "error", "err": err}) return err } if err := bindMount(l.resolvConf, filepath.Join(rootPath, "etc/resolv.conf"), false, true); err != nil { g.Log(grohl.Data{"at": "mount", "file": "resolv.conf", "status": "error", "err": err}) return err } if err := writeHostname(filepath.Join(rootPath, "etc/hosts"), job.ID); err != nil { g.Log(grohl.Data{"at": "write_hosts", "status": "error", "err": err}) return err } if err := os.MkdirAll(filepath.Join(rootPath, ".container-shared"), 0700); err != nil { g.Log(grohl.Data{"at": "mkdir", "dir": ".container-shared", "status": "error", "err": err}) return err } for i, m := range job.Config.Mounts { if err := os.MkdirAll(filepath.Join(rootPath, m.Location), 0755); err != nil { g.Log(grohl.Data{"at": "mkdir_mount", "dir": m.Location, "status": "error", "err": err}) return err } if m.Target == "" { m.Target = filepath.Join(l.VolPath, cluster.RandomJobID("")) job.Config.Mounts[i].Target = m.Target if err := os.MkdirAll(m.Target, 0755); err != nil { g.Log(grohl.Data{"at": "mkdir_vol", "dir": m.Target, "status": "error", "err": err}) return err } } if err := bindMount(m.Target, filepath.Join(rootPath, m.Location), m.Writeable, true); err != nil { g.Log(grohl.Data{"at": "mount", "target": m.Target, "location": m.Location, "status": "error", "err": err}) return err } } // apply volumes for _, v := range job.Config.Volumes { vol := l.vman.GetVolume(v.VolumeID) if vol == nil { err := fmt.Errorf("job %s required volume %s, but that volume does not exist", job.ID, v.VolumeID) g.Log(grohl.Data{"at": "volume", "volumeID": v.VolumeID, "status": "error", "err": err}) return err } if err := os.MkdirAll(filepath.Join(rootPath, v.Target), 0755); err != nil { g.Log(grohl.Data{"at": "volume_mkdir", "dir": v.Target, "status": "error", "err": err}) return err } if err != nil { g.Log(grohl.Data{"at": "volume_mount", "target": v.Target, "volumeID": v.VolumeID, "status": "error", "err": err}) return err } if err := bindMount(vol.Location(), filepath.Join(rootPath, v.Target), v.Writeable, true); err != nil { g.Log(grohl.Data{"at": "volume_mount2", "target": v.Target, "volumeID": v.VolumeID, "status": "error", "err": err}) return err } } if job.Config.Env == nil { job.Config.Env = make(map[string]string) } for i, p := range job.Config.Ports { if p.Proto != "tcp" && p.Proto != "udp" { return fmt.Errorf("unknown port proto %q", p.Proto) } if p.Port == 0 { job.Config.Ports[i].Port = 5000 + i } if i == 0 { job.Config.Env["PORT"] = strconv.Itoa(job.Config.Ports[i].Port) } job.Config.Env[fmt.Sprintf("PORT_%d", i)] = strconv.Itoa(job.Config.Ports[i].Port) } if !job.Config.HostNetwork { job.Config.Env["EXTERNAL_IP"] = container.IP.String() } config := &containerinit.Config{ TTY: job.Config.TTY, OpenStdin: job.Config.Stdin, WorkDir: job.Config.WorkingDir, Resources: job.Resources, } if !job.Config.HostNetwork { config.IP = container.IP.String() + "/24" config.Gateway = l.bridgeAddr.String() } if config.WorkDir == "" { config.WorkDir = imageConfig.WorkingDir } if job.Config.Uid > 0 { config.User = strconv.Itoa(job.Config.Uid) } else if imageConfig.User != "" { // TODO: check and lookup user from image config } if len(job.Config.Entrypoint) > 0 { config.Args = job.Config.Entrypoint config.Args = append(config.Args, job.Config.Cmd...) } else { config.Args = imageConfig.Entrypoint if len(job.Config.Cmd) > 0 { config.Args = append(config.Args, job.Config.Cmd...) } else { config.Args = append(config.Args, imageConfig.Cmd...) } } for _, port := range job.Config.Ports { config.Ports = append(config.Ports, port) } g.Log(grohl.Data{"at": "write_config"}) l.envMtx.RLock() err = writeContainerConfig(filepath.Join(rootPath, ".containerconfig"), config, map[string]string{ "PATH": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "TERM": "xterm", "HOME": "/", }, l.defaultEnv, job.Config.Env, map[string]string{ "HOSTNAME": job.ID, }, ) l.envMtx.RUnlock() if err != nil { g.Log(grohl.Data{"at": "write_config", "status": "error", "err": err}) return err } l.state.AddJob(job, container.IP) domain := <.Domain{ Type: "lxc", Name: job.ID, Memory: lt.UnitInt{Value: 1, Unit: "GiB"}, OS: lt.OS{ Type: lt.OSType{Value: "exe"}, Init: "/.containerinit", }, Devices: lt.Devices{ Filesystems: []lt.Filesystem{{ Type: "mount", Source: lt.FSRef{Dir: rootPath}, Target: lt.FSRef{Dir: "/"}, }}, Consoles: []lt.Console{{Type: "pty"}}, }, OnPoweroff: "preserve", OnCrash: "preserve", } if spec, ok := job.Resources[resource.TypeMemory]; ok && spec.Limit != nil { domain.Memory = lt.UnitInt{Value: *spec.Limit, Unit: "bytes"} } if !job.Config.HostNetwork { domain.Devices.Interfaces = []lt.Interface{{ Type: "network", Source: lt.InterfaceSrc{Network: libvirtNetName}, }} } // attempt to run libvirt commands multiple times in case the libvirt daemon is // temporarily unavailable (e.g. it has restarted, which sometimes happens in CI) g.Log(grohl.Data{"at": "define_domain"}) var vd libvirt.VirDomain if err := l.withConnRetries(func() (err error) { vd, err = l.libvirt.DomainDefineXML(string(domain.XML())) return }); err != nil { g.Log(grohl.Data{"at": "define_domain", "status": "error", "err": err}) return err } g.Log(grohl.Data{"at": "create_domain"}) if err := l.withConnRetries(vd.Create); err != nil { g.Log(grohl.Data{"at": "create_domain", "status": "error", "err": err}) return err } uuid, err := vd.GetUUIDString() if err != nil { g.Log(grohl.Data{"at": "get_domain_uuid", "status": "error", "err": err}) return err } g.Log(grohl.Data{"at": "get_uuid", "uuid": uuid}) l.state.SetContainerID(job.ID, uuid) domainXML, err := vd.GetXMLDesc(0) if err != nil { g.Log(grohl.Data{"at": "get_domain_xml", "status": "error", "err": err}) return err } domain = <.Domain{} if err := xml.Unmarshal([]byte(domainXML), domain); err != nil { g.Log(grohl.Data{"at": "unmarshal_domain_xml", "status": "error", "err": err}) return err } go container.watch(nil) g.Log(grohl.Data{"at": "finish"}) return nil }
func (c *libvirtContainer) watch(ready chan<- error) error { g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "watch_container", "job.id": c.job.ID}) g.Log(grohl.Data{"at": "start"}) defer func() { // TODO: kill containerinit/domain if it is still running c.l.containersMtx.Lock() delete(c.l.containers, c.job.ID) c.l.containersMtx.Unlock() c.cleanup() close(c.done) }() var symlinked bool var err error symlink := "/tmp/containerinit-rpc." + c.job.ID socketPath := path.Join(c.RootPath, containerinit.SocketPath) for startTime := time.Now(); time.Since(startTime) < 5*time.Second; time.Sleep(time.Millisecond) { if !symlinked { // We can't connect to the socket file directly because // the path to it is longer than 108 characters (UNIX_PATH_MAX). // Create a temporary symlink to connect to. if err = os.Symlink(socketPath, symlink); err != nil { g.Log(grohl.Data{"at": "symlink_socket", "status": "error", "err": err, "source": socketPath, "target": symlink}) continue } defer os.Remove(symlink) symlinked = true } c.Client, err = containerinit.NewClient(symlink) if err == nil { break } } if ready != nil { ready <- err } if err != nil { g.Log(grohl.Data{"at": "connect", "status": "error", "err": err}) return err } defer c.Client.Close() c.l.containersMtx.Lock() c.l.containers[c.job.ID] = c c.l.containersMtx.Unlock() if !c.job.Config.TTY { g.Log(grohl.Data{"at": "get_stdout"}) stdout, stderr, err := c.Client.GetStdout() if err != nil { g.Log(grohl.Data{"at": "get_stdout", "status": "error", "err": err.Error()}) return err } log := c.l.openLog(c.job.ID) defer log.Close() // TODO: log errors from these go log.ReadFrom(1, stdout) go log.ReadFrom(2, stderr) } g.Log(grohl.Data{"at": "watch_changes"}) for change := range c.Client.StreamState() { g.Log(grohl.Data{"at": "change", "state": change.State.String()}) if change.Error != "" { err := errors.New(change.Error) g.Log(grohl.Data{"at": "change", "status": "error", "err": err}) c.l.state.SetStatusFailed(c.job.ID, err) return err } switch change.State { case containerinit.StateInitial: g.Log(grohl.Data{"at": "wait_attach"}) c.l.state.WaitAttach(c.job.ID) g.Log(grohl.Data{"at": "resume"}) c.Client.Resume() case containerinit.StateRunning: g.Log(grohl.Data{"at": "running"}) c.l.state.SetStatusRunning(c.job.ID) case containerinit.StateExited: g.Log(grohl.Data{"at": "exited", "status": change.ExitStatus}) c.Client.Resume() c.l.state.SetStatusDone(c.job.ID, change.ExitStatus) return nil case containerinit.StateFailed: g.Log(grohl.Data{"at": "failed"}) c.Client.Resume() c.l.state.SetStatusFailed(c.job.ID, errors.New("container failed to start")) return nil } } g.Log(grohl.Data{"at": "unknown_failure"}) c.l.state.SetStatusFailed(c.job.ID, errors.New("unknown failure")) return nil }
func (c *context) watchHost(id string, events chan<- *host.Event) { if !c.hosts.Add(id) { return } defer c.hosts.Remove(id) g := grohl.NewContext(grohl.Data{"fn": "watchHost", "host.id": id}) h, err := c.DialHost(id) if err != nil { // TODO: log/handle error } c.hosts.Set(id, h) g.Log(grohl.Data{"at": "start"}) ch := make(chan *host.Event) h.StreamEvents("all", ch) // Nil event to mark the start of watching a host if events != nil { events <- nil } for event := range ch { job := c.jobs.Get(id, event.JobID) if job == nil { continue } j := &ct.Job{ID: id + "-" + event.JobID, AppID: job.Formation.AppID, ReleaseID: job.Formation.Release.ID, Type: job.Type} switch event.Event { case "create": j.State = "starting" case "start": j.State = "up" job.startedAt = event.Job.StartedAt case "stop": j.State = "down" case "error": j.State = "crashed" } if err = c.PutJob(j); err != nil { // TODO: log/handle error } if event.Event != "error" && event.Event != "stop" { if events != nil { events <- event } continue } g.Log(grohl.Data{"at": "remove", "job.id": event.JobID, "event": event.Event}) c.jobs.Remove(id, event.JobID) go func(event *host.Event) { c.mtx.RLock() job.Formation.RestartJob(job.Type, id, event.JobID) c.mtx.RUnlock() if events != nil { events <- event } }(event) } // TODO: check error/reconnect }
func (c *context) watchHost(h *cluster.Host, ready chan struct{}) { if !c.hosts.Add(h.ID()) { if ready != nil { ready <- struct{}{} } return } defer c.hosts.Remove(h.ID()) g := grohl.NewContext(grohl.Data{"fn": "watchHost", "host.id": h.ID()}) c.hosts.Set(h.ID(), h) g.Log(grohl.Data{"at": "start"}) ch := make(chan *host.Event) h.StreamEvents("all", ch) if ready != nil { ready <- struct{}{} } // Call PutJob in a goroutine so we don't block receiving job events whilst potentially // making multiple requests to the controller (e.g. if the controller is down). // // Use a channel (rather than spawning a goroutine per event) so that events are delivered in order. jobs := make(chan *ct.Job, 10) go func() { for job := range jobs { putJobAttempts.Run(func() error { if err := c.PutJob(job); err != nil { g.Log(grohl.Data{"at": "put_job_error", "job.id": job.ID, "state": job.State, "err": err}) // ignore validation / not found errors if httphelper.IsValidationError(err) || err == controller.ErrNotFound { return nil } return err } g.Log(grohl.Data{"at": "put_job", "job.id": job.ID, "state": job.State}) return nil }) } }() for event := range ch { meta := event.Job.Job.Metadata appID := meta["flynn-controller.app"] releaseID := meta["flynn-controller.release"] jobType := meta["flynn-controller.type"] if appID == "" || releaseID == "" { continue } job := &ct.Job{ ID: event.JobID, AppID: appID, ReleaseID: releaseID, Type: jobType, State: jobState(event), Meta: jobMetaFromMetadata(meta), } g.Log(grohl.Data{"at": "event", "job.id": event.JobID, "event": event.Event}) jobs <- job // get a read lock on the mutex to ensure we are not currently // syncing with the cluster c.mtx.RLock() j := c.jobs.Get(h.ID(), event.JobID) c.mtx.RUnlock() if j == nil { continue } j.startedAt = event.Job.StartedAt if event.Event != "error" && event.Event != "stop" { continue } g.Log(grohl.Data{"at": "remove", "job.id": event.JobID, "event": event.Event}) c.jobs.Remove(h.ID(), event.JobID) go func(event *host.Event) { c.mtx.RLock() j.Formation.RestartJob(jobType, h.ID(), event.JobID) c.mtx.RUnlock() }(event) } // TODO: check error/reconnect }
func (f *Formation) rectify() { g := grohl.NewContext(grohl.Data{"fn": "rectify", "app.id": f.AppID, "release.id": f.Release.ID}) var hosts map[string]host.Host if _, ok := f.c.omni[f]; ok { var err error hosts, err = f.c.ListHosts() if err != nil { return } if len(hosts) == 0 { // TODO: log/handle error } } // update job counts for t, expected := range f.Processes { if f.Release.Processes[t].Omni { // get job counts per host hostCounts := make(map[string]int, len(hosts)) for _, h := range hosts { hostCounts[h.ID] = 0 for _, job := range h.Jobs { if f.jobType(job) != t { continue } hostCounts[h.ID]++ } } // update per host for hostID, actual := range hostCounts { diff := expected - actual g.Log(grohl.Data{"at": "update", "type": t, "expected": expected, "actual": actual, "diff": diff}) if diff > 0 { f.add(diff, t, hostID) } else if diff < 0 { f.remove(-diff, t, hostID) } } } else { actual := len(f.jobs[t]) diff := expected - actual g.Log(grohl.Data{"at": "update", "type": t, "expected": expected, "actual": actual, "diff": diff}) if diff > 0 { f.add(diff, t, "") } else if diff < 0 { f.remove(-diff, t, "") } } } // remove process types for t, jobs := range f.jobs { // ignore one-off jobs which have no type if t == "" { continue } if _, exists := f.Processes[t]; !exists { g.Log(grohl.Data{"at": "cleanup", "type": t, "count": len(jobs)}) f.remove(len(jobs), t, "") } } }
func (c *context) syncCluster() { g := grohl.NewContext(grohl.Data{"fn": "syncCluster"}) artifacts := make(map[string]*ct.Artifact) releases := make(map[string]*ct.Release) rectify := make(map[*Formation]struct{}) hosts, err := c.Hosts() if err != nil { // TODO: log/handle error } c.mtx.Lock() for _, h := range hosts { jobs, err := h.ListJobs() if err != nil { // TODO: log/handle error continue } for _, j := range jobs { if j.Status != host.StatusStarting && j.Status != host.StatusRunning { continue } job := j.Job appID := job.Metadata["flynn-controller.app"] appName := job.Metadata["flynn-controller.app_name"] releaseID := job.Metadata["flynn-controller.release"] jobType := job.Metadata["flynn-controller.type"] gg := g.New(grohl.Data{"host.id": h.ID(), "job.id": job.ID, "app.id": appID, "release.id": releaseID, "type": jobType}) if appID == "" || releaseID == "" { continue } if job := c.jobs.Get(h.ID(), job.ID); job != nil { continue } f := c.formations.Get(appID, releaseID) if f == nil { release := releases[releaseID] if release == nil { release, err = c.GetRelease(releaseID) if err != nil { gg.Log(grohl.Data{"at": "getRelease", "status": "error", "err": err}) continue } releases[release.ID] = release } artifact := artifacts[release.ArtifactID] if artifact == nil { artifact, err = c.GetArtifact(release.ArtifactID) if err != nil { gg.Log(grohl.Data{"at": "getArtifact", "status": "error", "err": err}) continue } artifacts[artifact.ID] = artifact } formation, err := c.GetFormation(appID, releaseID) if err != nil { gg.Log(grohl.Data{"at": "getFormation", "status": "error", "err": err}) continue } f = NewFormation(c, &ct.ExpandedFormation{ App: &ct.App{ID: appID, Name: appName}, Release: release, Artifact: artifact, Processes: formation.Processes, }) gg.Log(grohl.Data{"at": "addFormation"}) f = c.formations.Add(f) } gg.Log(grohl.Data{"at": "addJob"}) go c.PutJob(&ct.Job{ ID: job.ID, AppID: appID, ReleaseID: releaseID, Type: jobType, State: "up", Meta: jobMetaFromMetadata(job.Metadata), }) j := f.jobs.Add(jobType, h.ID(), job.ID) j.Formation = f c.jobs.Add(j) rectify[f] = struct{}{} } } if err := c.syncJobStates(); err != nil { // TODO: handle error } c.mtx.Unlock() for f := range rectify { go f.Rectify() } }
func (c *context) syncCluster(events chan<- *host.Event) { g := grohl.NewContext(grohl.Data{"fn": "syncCluster"}) artifacts := make(map[string]*ct.Artifact) releases := make(map[string]*ct.Release) rectify := make(map[*Formation]struct{}) go c.watchHosts(events) hosts, err := c.ListHosts() if err != nil { // TODO: log/handle error } c.mtx.Lock() for _, h := range hosts { for _, job := range h.Jobs { appID := job.Metadata["flynn-controller.app"] releaseID := job.Metadata["flynn-controller.release"] jobType := job.Metadata["flynn-controller.type"] gg := g.New(grohl.Data{"host.id": h.ID, "job.id": job.ID, "app.id": appID, "release.id": releaseID, "type": jobType}) if appID == "" || releaseID == "" { continue } if job := c.jobs.Get(h.ID, job.ID); job != nil { continue } f := c.formations.Get(appID, releaseID) if f == nil { release := releases[releaseID] if release == nil { release, err = c.GetRelease(releaseID) if err != nil { gg.Log(grohl.Data{"at": "getRelease", "status": "error", "err": err}) continue } releases[release.ID] = release } artifact := artifacts[release.ArtifactID] if artifact == nil { artifact, err = c.GetArtifact(release.ArtifactID) if err != nil { gg.Log(grohl.Data{"at": "getArtifact", "status": "error", "err": err}) continue } artifacts[artifact.ID] = artifact } formation, err := c.GetFormation(appID, releaseID) if err != nil { gg.Log(grohl.Data{"at": "getFormation", "status": "error", "err": err}) continue } f = NewFormation(c, &ct.ExpandedFormation{ App: &ct.App{ID: appID}, Release: release, Artifact: artifact, Processes: formation.Processes, }) gg.Log(grohl.Data{"at": "addFormation"}) f = c.formations.Add(f) } gg.Log(grohl.Data{"at": "addJob"}) j := f.jobs.Add(jobType, h.ID, job.ID) j.Formation = f c.jobs.Add(j) rectify[f] = struct{}{} } } c.mtx.Unlock() for f := range rectify { go f.Rectify() } }