Example #1
0
func (c *Cluster) DumpLogs(buildLog *buildlog.Log) {
	run := func(log string, inst *Instance, cmds ...string) error {
		out, err := buildLog.NewFileWithTimeout(log, 60*time.Second)
		if err != nil {
			return err
		}
		for _, cmd := range cmds {
			fmt.Fprintln(out, "HostID:", inst.ID, "-", cmd)
			fmt.Fprintln(out)
			err := inst.Run(cmd, &Streams{Stdout: out, Stderr: out})
			fmt.Fprintln(out)
			if err != nil {
				return err
			}
		}
		return nil
	}
	for _, inst := range c.Instances {
		run(
			fmt.Sprintf("host-logs-%s.log", inst.ID),
			inst,
			"ps faux",
			"cat /tmp/flynn-host.log",
			"cat /tmp/debug-info.log",
			"sudo cat /var/log/libvirt/libvirtd.log",
		)
	}

	printLogs := func(typ string, instances []*Instance) {
		fallback := func() {
			for _, inst := range instances {
				run(fmt.Sprintf("%s-fallback-%s.log", typ, inst.ID), inst, "sudo bash -c 'tail -n +1 /var/log/flynn/**/*.log'")
			}
		}

		run(fmt.Sprintf("%s-jobs.log", typ), instances[0], "flynn-host ps -a")

		var out bytes.Buffer
		cmd := `flynn-host ps -aqf '{{ metadata "flynn-controller.app_name" }}-{{ metadata "flynn-controller.type" }}-{{ .HostID }}-{{ .Job.ID }}'`
		if err := instances[0].Run(cmd, &Streams{Stdout: &out, Stderr: &out}); err != nil {
			fallback()
			return
		}

		jobs := strings.Split(strings.TrimSpace(out.String()), "\n")
		for _, job := range jobs {
			fields := strings.SplitN(job, "-", 3)
			cmds := []string{
				fmt.Sprintf("flynn-host inspect %s", fields[2]),
				fmt.Sprintf("flynn-host log --init %s", fields[2]),
			}
			if err := run(fmt.Sprintf("%s-%s.log", typ, job), instances[0], cmds...); err != nil {
				fallback()
				return
			}
		}
	}
	if len(c.defaultInstances) > 0 {
		printLogs("default", c.defaultInstances)
	}
	if len(c.releaseInstances) > 0 {
		printLogs("release", c.releaseInstances)
	}
}
Example #2
0
func (c *Cluster) DumpLogs(buildLog *buildlog.Log) {
	run := func(log string, inst *Instance, cmds ...string) error {
		out, err := buildLog.NewFileWithTimeout(log, 60*time.Second)
		if err != nil {
			return err
		}
		for _, cmd := range cmds {
			fmt.Fprintln(out, "HostID:", inst.ID, "-", cmd)
			fmt.Fprintln(out)
			err := inst.Run(cmd, &Streams{Stdout: out, Stderr: out})
			fmt.Fprintln(out)
			if err != nil {
				return err
			}
		}
		return nil
	}
	for _, inst := range c.Instances {
		run(
			fmt.Sprintf("host-logs-%s.log", inst.ID),
			inst,
			"ps faux",
			"cat /var/log/flynn/flynn-host.log",
			"cat /tmp/debug-info.log",
		)
	}

	printLogs := func(typ string, instances []*Instance) {
		fallback := func(instances []*Instance) {
			for _, inst := range instances {
				run(fmt.Sprintf("%s-fallback-%s.log", typ, inst.ID), inst, "sudo bash -c 'tail -n +1 /var/log/flynn/*.log'")
			}
		}

		run(fmt.Sprintf("%s-jobs.log", typ), instances[0], "flynn-host ps -a")

		var out bytes.Buffer
		cmd := `flynn-host ps -aqf '{{ metadata "flynn-controller.app_name" }}:{{ metadata "flynn-controller.type" }}:{{ .Job.ID }}'`
		if err := instances[0].Run(cmd, &Streams{Stdout: &out, Stderr: &out}); err != nil {
			fallback(instances)
			return
		}

		// only fallback if all `flynn-host log` commands fail
		shouldFallback := true
		jobs := strings.Split(strings.TrimSpace(out.String()), "\n")
		for _, job := range jobs {
			fields := strings.Split(job, ":")
			jobID := fields[2]
			cmds := []string{
				fmt.Sprintf("timeout 10s flynn-host inspect --redact-env BACKEND_S3 %s", jobID),
				fmt.Sprintf("timeout 10s flynn-host log --init %s", jobID),
			}
			if err := run(fmt.Sprintf("%s-%s.log", typ, job), instances[0], cmds...); err != nil {
				continue
			}
			shouldFallback = false
		}
		if shouldFallback {
			fallback(instances)
		}

		// run the fallback on any stopped instances as their logs will
		// not appear in `flynn-host ps`
		stoppedInstances := make([]*Instance, 0, len(instances))
		for _, inst := range instances {
			if err := inst.Run("sudo kill -0 $(cat /var/run/flynn-host.pid)", nil); err != nil {
				stoppedInstances = append(stoppedInstances, inst)
			}
		}
		if len(stoppedInstances) > 0 {
			fallback(stoppedInstances)
		}
	}
	if len(c.defaultInstances) > 0 {
		printLogs("default", c.defaultInstances)
	}
	if len(c.releaseInstances) > 0 {
		printLogs("release", c.releaseInstances)
	}
}