func (nc *nspawnCluster) createMember(id string) (m Member, err error) { nm := nspawnMember{ uuid: util.NewMachineID(), id: id, ip: fmt.Sprintf("172.18.1.%s", id), } nc.members[nm.ID()] = nm basedir := path.Join(os.TempDir(), nc.name) fsdir := path.Join(basedir, nm.ID(), "fs") cmds := []string{ // set up directory for fleet service fmt.Sprintf("mkdir -p %s/etc/systemd/system", fsdir), // minimum requirements for running systemd/coreos in a container. // NOTE: copying /etc/pam.d is necessary only for such setups with // sys-auth/pambase installed, for example developer image of CoreOS // 1053.2.0. It should be fine also for systems where /etc/pam.d is // empty, because then it should automatically fall back to // /usr/lib64/pam.d, which belongs to sys-libs/pam. fmt.Sprintf("mkdir -p %s/usr", fsdir), fmt.Sprintf("cp /etc/os-release %s/etc", fsdir), fmt.Sprintf("cp -a /etc/pam.d %s/etc", fsdir), fmt.Sprintf("ln -s /proc/self/mounts %s/etc/mtab", fsdir), fmt.Sprintf("ln -s usr/lib64 %s/lib64", fsdir), fmt.Sprintf("ln -s lib64 %s/lib", fsdir), fmt.Sprintf("ln -s usr/bin %s/bin", fsdir), fmt.Sprintf("ln -s usr/sbin %s/sbin", fsdir), fmt.Sprintf("mkdir -p %s/home/core/.ssh", fsdir), fmt.Sprintf("install -d -o root -g systemd-journal -m 2755 %s/var/log/journal", fsdir), fmt.Sprintf("chown -R 500:500 %s/home/core", fsdir), // We don't need this, and it's slow, so mask it fmt.Sprintf("ln -s /dev/null %s/etc/systemd/system/systemd-udev-hwdb-update.service", fsdir), // set up directory for sshd_config (see below) fmt.Sprintf("mkdir -p %s/etc/ssh", fsdir), } for _, cmd := range cmds { var stderr, stdout string stdout, stderr, err = run(cmd) if err != nil { log.Printf("Command '%s' failed:\nstdout:: %s\nstderr: %s\nerr: %v", cmd, stdout, stderr, err) return } } filesContents := []struct { path string contents string mode os.FileMode }{ { "/etc/ssh/sshd_config", `# Use most defaults for sshd configuration. UsePrivilegeSeparation sandbox Subsystem sftp internal-sftp UseDNS no `, 0644, }, // For expediency, generate the minimal viable SSH keys for the host, instead of the default set { "/etc/systemd/system/sshd-keygen.service", `[Unit] Description=Generate sshd host keys Before=sshd.service [Service] Type=oneshot RemainAfterExit=yes ExecStart=/usr/bin/ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N "" -b 1024`, 0644, }, { "/etc/passwd", "core:x:500:500:CoreOS Admin:/home/core:/bin/bash", 0644, }, { "/etc/group", "core:x:500:", 0644, }, { "/etc/machine-id", nm.ID(), 0644, }, { "/home/core/.bash_profile", "export PATH=/opt/fleet:$PATH", 0644, }, } for _, file := range filesContents { if err = ioutil.WriteFile(path.Join(fsdir, file.path), []byte(file.contents), file.mode); err != nil { log.Printf("Failed writing %s: %v", path.Join(fsdir, file.path), err) return } } if err = nc.insertBin(fleetdBinPath, fsdir); err != nil { log.Printf("Failed preparing fleetd in filesystem: %v", err) return } if err = nc.insertBin(fleetctlBinPath, fsdir); err != nil { log.Printf("Failed preparing fleetctl in filesystem: %v", err) return } if err = nc.buildConfigDrive(fsdir, nm.IP()); err != nil { log.Printf("Failed building config drive: %v", err) return } exec := strings.Join([]string{ "/usr/bin/systemd-nspawn", "--bind-ro=/usr", "-b", "--uuid=" + nm.uuid, fmt.Sprintf("-M %s%s", nc.name, nm.ID()), "--capability=CAP_NET_BIND_SERVICE,CAP_SYS_TIME", // needed for ntpd "--network-bridge fleet0", fmt.Sprintf("-D %s", fsdir), }, " ") log.Printf("Creating nspawn container: %s", exec) err = nc.systemd(fmt.Sprintf("%s%s.service", nc.name, nm.ID()), exec) if err != nil { log.Printf("Failed creating nspawn container: %v", err) return } nm.pid, err = nc.machinePID(nm.ID()) if err != nil { log.Printf("Failed detecting machine %s%s PID: %v", nc.name, nm.ID(), err) return } alarm := time.After(10 * time.Second) addr := fmt.Sprintf("%s:%d", nm.IP(), fleetAPIPort) for { select { case <-alarm: err = fmt.Errorf("Timed out waiting for machine to start") log.Printf("Starting %s%s failed: %v", nc.name, nm.ID(), err) return default: } log.Printf("Dialing machine: %s", addr) c, err := net.DialTimeout("tcp", addr, 100*time.Millisecond) if err == nil { c.Close() break } time.Sleep(100 * time.Millisecond) } return Member(&nm), nil }
// TestDetectMachineId checks for etcd registration failing on a duplicated // machine-id on different machines. // First it creates a cluster with 2 members, m0 and m1. Then make their // machine IDs the same as each other, by explicitly setting the m1's ID to // the same as m0's. Test succeeds when an error returns, while test fails // when nothing happens. func TestDetectMachineId(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) members, err := platform.CreateNClusterMembers(cluster, 2) if err != nil { t.Fatal(err) } m0 := members[0] m1 := members[1] _, err = cluster.WaitForNMachines(m0, 2) if err != nil { t.Fatal(err) } machineIdFile := "/etc/machine-id" // Restart fleet service, and check if its systemd status is still active. restartFleetService := func(m platform.Member) error { stdout, err := cluster.MemberCommand(m, "sudo", "systemctl", "restart", "fleet.service") if err != nil { return fmt.Errorf("Failed to restart fleet service\nstdout: %s\nerr: %v", stdout, err) } stdout, _ = cluster.MemberCommand(m, "systemctl", "show", "--property=ActiveState", "fleet") if strings.TrimSpace(stdout) != "ActiveState=active" { return fmt.Errorf("Fleet unit not reported as active: %s", stdout) } stdout, _ = cluster.MemberCommand(m, "systemctl", "show", "--property=Result", "fleet") if strings.TrimSpace(stdout) != "Result=success" { return fmt.Errorf("Result for fleet unit not reported as success: %s", stdout) } return nil } stdout, err := cluster.MemberCommand(m0, "cat", machineIdFile) if err != nil { t.Fatalf("Failed to get machine-id\nstdout: %s\nerr: %v", stdout, err) } m0_machine_id := strings.TrimSpace(stdout) // If the two machine IDs are different with each other, // set the m1's ID to the same one as m0, to intentionally // trigger an error case of duplication of machine ID. stdout, err = cluster.MemberCommand(m1, "echo", m0_machine_id, "|", "sudo", "tee", machineIdFile) if err != nil { t.Fatalf("Failed to replace machine-id\nstdout: %s\nerr: %v", stdout, err) } if err := restartFleetService(m1); err != nil { t.Fatal(err) } // fleetd should actually be running, but failing to list machines. // So we should expect a specific error after running fleetctl list-machines, // like "googlapi: Error 503: fleet server unable to communicate with etcd". stdout, stderr, err := cluster.Fleetctl(m1, "list-machines", "--no-legend") if err != nil { if !strings.Contains(err.Error(), "exit status 1") || !strings.Contains(stderr, "fleet server unable to communicate with etcd") { t.Fatalf("m1: Failed to get list of machines. err: %v\nstderr: %s", err, stderr) } // If both conditions are satisfied, "exit status 1" and // "...unable to communicate...", then it's an expected error. PASS. } else { t.Fatalf("m1: should get an error, but got success.\nstderr: %s", stderr) } // Trigger another test case of m0's ID getting different from m1's. // Then it's expected that m0 and m1 would be working properly with distinct // machine IDs, after having restarted fleet.service both on m0 and m1. stdout, err = cluster.MemberCommand(m0, "echo", util.NewMachineID(), "|", "sudo", "tee", machineIdFile) if err != nil { t.Fatalf("m0: Failed to replace machine-id\nstdout: %s\nerr: %v", stdout, err) } // Restart fleet service on m0, and see that it's still working. if err := restartFleetService(m0); err != nil { t.Fatal(err) } stdout, stderr, err = cluster.Fleetctl(m0, "list-machines", "--no-legend") if err != nil { t.Fatalf("m0: error: %v\nstdout: %s\nstderr: %s", err, stdout, stderr) } }