// Start 5 services that conflict with one another. Assert that only // 3 of the 5 are started. func TestScheduleGlobalConflicts(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy() // Start with a simple three-node cluster if err := platform.CreateNClusterMembers(cluster, 3, platform.MachineConfig{}); err != nil { t.Fatal(err) } machines, err := cluster.WaitForNMachines(3) if err != nil { t.Fatal(err) } // Ensure we can SSH into each machine using fleetctl for _, machine := range machines { if _, _, err := cluster.Fleetctl("--strict-host-key-checking=false", "ssh", machine, "uptime"); err != nil { t.Errorf("Unable to SSH into fleet machine: %v", err) } } for i := 0; i < 5; i++ { unit := fmt.Sprintf("fixtures/units/conflict.%d.service", i) _, _, err := cluster.Fleetctl("start", "--no-block", unit) if err != nil { t.Errorf("Failed starting unit %s: %v", unit, err) } } // All 5 services should be visible immediately and 3 should become // ACTIVE shortly thereafter stdout, _, err := cluster.Fleetctl("list-units", "--no-legend") if err != nil { t.Fatalf("Failed to run list-units: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 5 { t.Fatalf("Did not find five units in cluster: \n%s", stdout) } states, err := cluster.WaitForNActiveUnits(3) if err != nil { t.Fatal(err) } machineSet := make(map[string]bool) for unit, unitState := range states { if len(unitState.Machine) == 0 { t.Errorf("Unit %s is not reporting machine", unit) } machineSet[unitState.Machine] = true } if len(machineSet) != 3 { t.Errorf("3 active units not running on 3 unique machines") } }
// TestUnitRunnable is the simplest test possible, deplying a single-node // cluster and ensuring a unit can enter an 'active' state func TestUnitRunnable(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy() if err := platform.CreateNClusterMembers(cluster, 1, platform.MachineConfig{}); err != nil { t.Fatal(err) } _, err = cluster.WaitForNMachines(1) if err != nil { t.Fatal(err) } if stdout, stderr, err := cluster.Fleetctl("start", "fixtures/units/hello.service"); err != nil { t.Fatalf("Unable to start fleet unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } units, err := cluster.WaitForNActiveUnits(1) if err != nil { t.Fatal(err) } _, found := units["hello.service"] if len(units) != 1 || !found { t.Fatalf("Expected hello.service to be sole active unit, got %v", units) } }
func TestScheduleGlobalUnits(t *testing.T) { // Create a three-member cluster cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) members, err := platform.CreateNClusterMembers(cluster, 3) if err != nil { t.Fatal(err) } m0 := members[0] machines, err := cluster.WaitForNMachines(m0, 3) if err != nil { t.Fatal(err) } // Launch a couple of simple units stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", "fixtures/units/hello.service", "fixtures/units/goodbye.service") if err != nil { t.Fatalf("Failed starting units: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } // Both units should show up active _, err = cluster.WaitForNActiveUnits(m0, 2) if err != nil { t.Fatal(err) } // Now add a global unit stdout, stderr, err = cluster.Fleetctl(m0, "start", "--no-block", "fixtures/units/global.service") if err != nil { t.Fatalf("Failed starting unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } // Should see 2 + 3 units states, err := cluster.WaitForNActiveUnits(m0, 5) if err != nil { t.Fatal(err) } // Each machine should have a single global unit us := states["global.service"] for _, mach := range machines { var found bool for _, state := range us { if state.Machine == mach { found = true break } } if !found { t.Fatalf("Did not find global unit on machine %v", mach) t.Logf("Found unit states:") for _, state := range states { t.Logf("%#v", state) } } } }
// Ensure units can be scheduled directly to a given machine using the // MachineID unit option. func TestScheduleConditionMachineID(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) // Start with a simple three-node cluster members, err := platform.CreateNClusterMembers(cluster, 3) if err != nil { t.Fatal(err) } m0 := members[0] machines, err := cluster.WaitForNMachines(m0, 3) if err != nil { t.Fatal(err) } // Start 3 units that are each scheduled to one of our machines schedule := make(map[string]string) for _, machine := range machines { contents := ` [Service] ExecStart=/bin/bash -c "while true; do echo Hello, World!; sleep 1; done" [X-Fleet] MachineID=%s ` unitFile, err := util.TempUnit(fmt.Sprintf(contents, machine)) if err != nil { t.Fatalf("Failed creating temporary unit: %v", err) } defer os.Remove(unitFile) stdout, stderr, err := cluster.Fleetctl(m0, "start", unitFile) if err != nil { t.Fatalf("Failed starting unit file %s: \nstdout: %s\nstderr: %s\nerr: %v", unitFile, stdout, stderr, err) } unit := filepath.Base(unitFile) schedule[unit] = machine } // Block until our three units have been started active, err := cluster.WaitForNActiveUnits(m0, 3) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } for unit, unitState := range states { if unitState.Machine != schedule[unit] { t.Errorf("Unit %s was scheduled to %s, expected %s", unit, unitState.Machine, schedule[unit]) } } }
func TestUnitSubmit(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy() if err := platform.CreateNClusterMembers(cluster, 1, platform.MachineConfig{}); err != nil { t.Fatal(err) } _, err = cluster.WaitForNMachines(1) if err != nil { t.Fatal(err) } // submit a unit and assert it shows up if _, _, err := cluster.Fleetctl("submit", "fixtures/units/hello.service"); err != nil { t.Fatalf("Unable to submit fleet unit: %v", err) } stdout, _, err := cluster.Fleetctl("list-units", "--no-legend") if err != nil { t.Fatalf("Failed to run list-units: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 1 { t.Fatalf("Did not find 1 unit in cluster: \n%s", stdout) } // submitting the same unit should not fail if _, _, err = cluster.Fleetctl("submit", "fixtures/units/hello.service"); err != nil { t.Fatalf("Expected no failure when double-submitting unit, got this: %v", err) } // destroy the unit and ensure it disappears from the unit list if _, _, err := cluster.Fleetctl("destroy", "fixtures/units/hello.service"); err != nil { t.Fatalf("Failed to destroy unit: %v", err) } stdout, _, err = cluster.Fleetctl("list-units", "--no-legend") if err != nil { t.Fatalf("Failed to run list-units: %v", err) } if strings.TrimSpace(stdout) != "" { t.Fatalf("Did not find 0 units in cluster: \n%s", stdout) } // submitting the unit after destruction should succeed if _, _, err := cluster.Fleetctl("submit", "fixtures/units/hello.service"); err != nil { t.Fatalf("Unable to submit fleet unit: %v", err) } stdout, _, err = cluster.Fleetctl("list-units", "--no-legend") if err != nil { t.Fatalf("Failed to run list-units: %v", err) } units = strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 1 { t.Fatalf("Did not find 1 unit in cluster: \n%s", stdout) } }
func TestKnownHostsVerification(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy() members, err := platform.CreateNClusterMembers(cluster, 2) if err != nil { t.Fatal(err) } m0 := members[0] machines, err := cluster.WaitForNMachines(m0, 2) if err != nil { t.Fatal(err) } machine := machines[0] tmp, err := ioutil.TempFile(os.TempDir(), "known-hosts") if err != nil { t.Fatal(err) } tmp.Close() defer syscall.Unlink(tmp.Name()) khFile := tmp.Name() if stdout, stderr, err := cluster.FleetctlWithInput(m0, "yes", "--strict-host-key-checking=true", fmt.Sprintf("--known-hosts-file=%s", khFile), "ssh", machine, "uptime"); err != nil { t.Errorf("Unable to SSH into fleet machine: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } _, err = cluster.ReplaceMember(members[1]) if err != nil { t.Fatalf("Failed replacing machine: %v", err) } machines, err = cluster.WaitForNMachines(m0, 2) if err != nil { t.Fatal(err) } machine = machines[0] // SSH'ing to the cluster member should now fail with a host key mismatch if _, _, err := cluster.Fleetctl(m0, "--strict-host-key-checking=true", fmt.Sprintf("--known-hosts-file=%s", khFile), "ssh", machine, "uptime"); err == nil { t.Errorf("Expected error while SSH'ing to fleet machine") } // Overwrite the known-hosts file to simulate removing the old host key if err := ioutil.WriteFile(khFile, []byte{}, os.FileMode(0644)); err != nil { t.Fatalf("Unable to overwrite known-hosts file: %v", err) } // And SSH should work again if stdout, stderr, err := cluster.FleetctlWithInput(m0, "yes", "--strict-host-key-checking=true", fmt.Sprintf("--known-hosts-file=%s", khFile), "ssh", machine, "uptime"); err != nil { t.Errorf("Unable to SSH into fleet machine: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } }
func TestUnitSSHActions(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy() if err := platform.CreateNClusterMembers(cluster, 1, platform.MachineConfig{}); err != nil { t.Fatal(err) } _, err = cluster.WaitForNMachines(1) if err != nil { t.Fatal(err) } if _, _, err := cluster.Fleetctl("start", "--no-block", "fixtures/units/hello.service"); err != nil { t.Fatalf("Unable to start fleet unit: %v", err) } units, err := cluster.WaitForNActiveUnits(1) if err != nil { t.Fatal(err) } _, found := units["hello.service"] if len(units) != 1 || !found { t.Fatalf("Expected hello.service to be sole active unit, got %v", units) } stdout, _, err := cluster.Fleetctl("--strict-host-key-checking=false", "ssh", "hello.service", "echo", "foo") if err != nil { t.Errorf("Failure occurred while calling fleetctl ssh: %v", err) } if !strings.Contains(stdout, "foo") { t.Errorf("Could not find expected string in command output:\n%s", stdout) } stdout, _, err = cluster.Fleetctl("--strict-host-key-checking=false", "status", "hello.service") if err != nil { t.Errorf("Failure occurred while calling fleetctl status: %v", err) } if !strings.Contains(stdout, "Active: active") { t.Errorf("Could not find expected string in status output:\n%s", stdout) } stdout, _, err = cluster.Fleetctl("--strict-host-key-checking=false", "journal", "hello.service") if err != nil { t.Errorf("Failure occurred while calling fleetctl journal: %v", err) } if !strings.Contains(stdout, "Hello, World!") { t.Errorf("Could not find expected string in journal output:\n%s", stdout) } }
func TestScheduleOneWayConflict(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy() // Start with a simple three-node cluster if err := platform.CreateNClusterMembers(cluster, 1, platform.MachineConfig{}); err != nil { t.Fatal(err) } if _, err := cluster.WaitForNMachines(1); err != nil { t.Fatal(err) } // Start a unit that conflicts with a yet-to-be-scheduled unit name := "fixtures/units/conflicts-with-hello.service" if _, _, err := cluster.Fleetctl("start", name); err != nil { t.Fatalf("Failed starting unit %s: %v", name, err) } // Start a unit that has not defined conflicts name = "fixtures/units/hello.service" if _, _, err := cluster.Fleetctl("start", name); err == nil { t.Fatalf("Unit %s unexpectedly started", name) } // Both units should show up, but only conflicts-with-hello.service // should report ACTIVE stdout, _, err := cluster.Fleetctl("list-units", "--no-legend") if err != nil { t.Fatalf("Failed to run list-units: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 2 { t.Fatalf("Did not find two units in cluster: \n%s", stdout) } states, err := cluster.WaitForNActiveUnits(1) if err != nil { t.Fatal(err) } for unit, _ := range states { if unit != "conflicts-with-hello.service" { t.Error("Incorrect unit started:", unit) } } }
func TestScheduleOneWayConflict(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) // Start with a simple three-node cluster members, err := platform.CreateNClusterMembers(cluster, 1) if err != nil { t.Fatal(err) } m0 := members[0] if _, err := cluster.WaitForNMachines(m0, 1); err != nil { t.Fatal(err) } // Start a unit that conflicts with a yet-to-be-scheduled unit name := "fixtures/units/conflicts-with-hello.service" if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err) } active, err := cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } // Start a unit that has not defined conflicts name = "fixtures/units/hello.service" if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err) } // Both units should show up, but only conflicts-with-hello.service // should report ACTIVE stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 2 { t.Fatalf("Did not find two units in cluster: \n%s", stdout) } active, err = cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } for unit := range states { if unit != "conflicts-with-hello.service" { t.Error("Incorrect unit started:", unit) } } // Destroying the conflicting unit should allow the other to start name = "conflicts-with-hello.service" if _, _, err := cluster.Fleetctl(m0, "destroy", name); err != nil { t.Fatalf("Failed destroying %s", name) } // NOTE: we need to sleep here shortly to avoid occasional errors of // conflicts-with-hello.service being rescheduled even after being destroyed. // In that case, the conflicts unit remains active, while the original // hello.service remains inactive. Then the test TestScheduleOneWayConflict // fails at the end with a message "Incorrect unit started". // This error seems to occur frequently when enable_grpc turned on. // - dpark 20160615 time.Sleep(1 * time.Second) // Wait for the destroyed unit to actually disappear timeout, err := util.WaitForState( func() bool { stdout, _, err := cluster.Fleetctl(m0, "list-units", "--no-legend", "--full", "--fields", "unit,active,machine") if err != nil { return false } lines := strings.Split(strings.TrimSpace(stdout), "\n") states := util.ParseUnitStates(lines) for _, state := range states { if state.Name == name { return false } } return true }, ) if err != nil { t.Fatalf("Destroyed unit %s not gone within %v", name, timeout) } active, err = cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } for unit := range states { if unit != "hello.service" { t.Error("Incorrect unit started:", unit) } } }
// Start three machines and test template units based on machines Metadata func TestTemplatesWithSpecifiersInMetadata(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) members, err := platform.CreateNClusterMembers(cluster, 3) if err != nil { t.Fatal(err) } m0 := members[0] _, err = cluster.WaitForNMachines(m0, 3) if err != nil { t.Fatal(err) } // Submit one template if stdout, stderr, err := cluster.Fleetctl(m0, "submit", "fixtures/units/[email protected]"); err != nil { t.Fatalf("Unable to submit [email protected] template: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } // Start units based on template in backward order for i := len(members) - 1; i >= 0; i-- { if stdout, stderr, err := cluster.Fleetctl(m0, "start", fmt.Sprintf("fixtures/units/metadata@smoke%s.service", members[i].ID())); err != nil { t.Fatalf("Unable to start template based unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } } _, err = cluster.WaitForNActiveUnits(m0, 3) if err != nil { t.Fatal(err) } stdout, stderr, err := cluster.Fleetctl(m0, "list-units", "--no-legend", "--full", "--fields", "unit,active,machine") if err != nil { t.Fatalf("Unable to get submitted units: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } ndesired := 3 stdout = strings.TrimSpace(stdout) lines := strings.Split(stdout, "\n") allStates := util.ParseUnitStates(lines) active := util.FilterActiveUnits(allStates) nactive := len(active) if nactive != ndesired { t.Fatalf("Failed to get %d active units: \nstdout: %s\nstderr: %s", ndesired, stdout, stderr) } for _, state := range active { re := regexp.MustCompile(`@([^.]*)`) desiredMachine := re.FindStringSubmatch(state.Name) if len(desiredMachine) < 2 { t.Fatalf("Cannot parse state.Name (%v): \nstdout: %s\nstderr: %s", state.Name, stdout, stderr) } currentMachine := fmt.Sprintf("smoke%s", state.Machine) if desiredMachine[1] != currentMachine { t.Fatalf("Template (%s) has been scheduled on wrong machine (%s): \nstdout: %s\nstderr: %s", state.Name, currentMachine, stdout, stderr) } } if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--block-attempts=20", "fixtures/units/[email protected]"); err == nil { t.Fatalf("metadata@invalid unit should not be scheduled: \nstdout: %s\nstderr: %s", stdout, stderr) } }
// Ensure an existing unit migrates to an unoccupied machine // if its host goes down. func TestDynamicClusterNewMemberUnitMigration(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy() // Start with a 4-node cluster members, err := platform.CreateNClusterMembers(cluster, 4) if err != nil { t.Fatal(err) } m0 := members[0] if _, err = cluster.WaitForNMachines(m0, 4); err != nil { t.Fatal(err) } // Start 3 conflicting units on the 4-node cluster _, _, err = cluster.Fleetctl(m0, "start", "fixtures/units/conflict.0.service", "fixtures/units/conflict.1.service", "fixtures/units/conflict.2.service", ) if err != nil { t.Errorf("Failed starting units: %v", err) } // All 3 services should be visible immediately, and all of them should // become ACTIVE shortly thereafter stdout, _, err := cluster.Fleetctl(m0, "list-units", "--no-legend") if err != nil { t.Fatalf("Failed to run list-units: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 3 { t.Fatalf("Did not find 3 units in cluster: \n%s", stdout) } active, err := cluster.WaitForNActiveUnits(m0, 3) if err != nil { t.Fatal(err) } // Ensure each unit is only running on a single machine states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } // Kill one of the machines and make sure the unit migrates somewhere else unit := "conflict.1.service" oldMach := states[unit].Machine oldIP := states[unit].IP if _, _, err = cluster.Fleetctl(m0, "--strict-host-key-checking=false", "ssh", oldMach, "sudo", "systemctl", "stop", "fleet"); err != nil { t.Fatal(err) } var mN platform.Member if m0.IP() == oldIP { mN = members[1] } else { mN = m0 } if _, err = cluster.WaitForNMachines(mN, 3); err != nil { t.Fatal(err) } newActive, err := cluster.WaitForNActiveUnits(mN, 3) if err != nil { t.Fatal(err) } // Ensure each unit is only running on a single machine newStates, err := util.ActiveToSingleStates(newActive) if err != nil { t.Fatal(err) } newMach := newStates[unit].Machine if newMach == oldMach { t.Fatalf("Unit %s did not migrate from machine %s to %s", unit, oldMach, newMach) } // Ensure no other units migrated due to this churn if newMach == states["conflict.0.service"].Machine || newMach == states["conflict.2.service"].Machine { t.Errorf("Unit %s landed on occupied machine", unit) } if states["conflict.0.service"].Machine != newStates["conflict.0.service"].Machine || states["conflict.2.service"].Machine != newStates["conflict.2.service"].Machine { t.Errorf("Unit caused unnecessary churn in the cluster") } }
// Simulate rebooting a single member of a fleet cluster func TestDynamicClusterMemberReboot(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy() // Start with a simple three-node cluster members, err := platform.CreateNClusterMembers(cluster, 3) if err != nil { t.Fatal(err) } m0 := members[0] if _, err = cluster.WaitForNMachines(m0, 3); err != nil { t.Fatal(err) } _, _, err = cluster.Fleetctl(m0, "start", "fixtures/units/conflict.0.service", "fixtures/units/conflict.1.service", "fixtures/units/conflict.2.service", ) if err != nil { t.Errorf("Failed starting units: %v", err) } // All 3 services should be visible immediately, and all of them should // become ACTIVE shortly thereafter stdout, _, err := cluster.Fleetctl(m0, "list-units", "--no-legend") if err != nil { t.Fatalf("Failed to run list-units: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 3 { t.Fatalf("Did not find 3 units in cluster: \n%s", stdout) } oldActive, err := cluster.WaitForNActiveUnits(m0, 3) if err != nil { t.Fatal(err) } oldStates, err := util.ActiveToSingleStates(oldActive) if err != nil { t.Fatal(err) } // Simulate a reboot by recreating one of the cluster members if _, err := cluster.ReplaceMember(cluster.Members()[1]); err != nil { t.Fatalf("replace failed: %v", err) } newActive, err := cluster.WaitForNActiveUnits(m0, 3) if err != nil { t.Fatal(err) } newStates, err := util.ActiveToSingleStates(newActive) if err != nil { t.Fatal(err) } migrated := 0 for _, unit := range []string{"conflict.0.service", "conflict.1.service", "conflict.2.service"} { if oldStates[unit].Machine != newStates[unit].Machine { migrated += 1 } } if migrated != 1 { t.Errorf("Expected 1 unit to migrate, but found %d", migrated) t.Logf("Initial state: %#v", oldStates) t.Logf("Post-reboot state: %#v", newStates) } }
func TestScheduleOneWayConflict(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy() // Start with a simple three-node cluster members, err := platform.CreateNClusterMembers(cluster, 1) if err != nil { t.Fatal(err) } m0 := members[0] if _, err := cluster.WaitForNMachines(m0, 1); err != nil { t.Fatal(err) } // Start a unit that conflicts with a yet-to-be-scheduled unit name := "fixtures/units/conflicts-with-hello.service" if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err) } active, err := cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } // Start a unit that has not defined conflicts name = "fixtures/units/hello.service" if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err) } // Both units should show up, but only conflicts-with-hello.service // should report ACTIVE stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 2 { t.Fatalf("Did not find two units in cluster: \n%s", stdout) } active, err = cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } for unit := range states { if unit != "conflicts-with-hello.service" { t.Error("Incorrect unit started:", unit) } } // Destroying the conflicting unit should allow the other to start name = "conflicts-with-hello.service" if _, _, err := cluster.Fleetctl(m0, "destroy", name); err != nil { t.Fatalf("Failed destroying %s", name) } // TODO(jonboulle): fix this race. Since we no longer immediately // remove unit state on unit destruction (and instead wait for // UnitStateGenerator/UnitStatePublisher to clean up), the old unit // shows up as active for quite some time. time.Sleep(5 * time.Second) stdout, _, err = cluster.Fleetctl(m0, "list-units", "--no-legend") if err != nil { t.Fatalf("Failed to run list-units: %v", err) } units = strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 1 { t.Fatalf("Did not find one unit in cluster: \n%s", stdout) } active, err = cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } for unit := range states { if unit != "hello.service" { t.Error("Incorrect unit started:", unit) } } }
// Simulate rebooting a single member of a fleet cluster func TestDynamicClusterMemberReboot(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy() // Start with a simple three-node cluster if err := platform.CreateNClusterMembers(cluster, 3, platform.MachineConfig{}); err != nil { t.Fatal(err) } if _, err = cluster.WaitForNMachines(3); err != nil { t.Fatal(err) } if _, _, err := cluster.Fleetctl("start", "fixtures/units/conflict.0.service", "fixtures/units/conflict.1.service", "fixtures/units/conflict.2.service", ); err != nil { t.Errorf("Failed starting units: %v", err) } // All 3 services should be visible immediately, and all of them should // become ACTIVE shortly thereafter stdout, _, err := cluster.Fleetctl("list-units", "--no-legend") if err != nil { t.Fatalf("Failed to run list-units: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 3 { t.Fatalf("Did not find 3 units in cluster: \n%s", stdout) } oldStates, err := cluster.WaitForNActiveUnits(3) if err != nil { t.Fatal(err) } // Simulate a reboot by recreating one of the cluster members member := cluster.Members()[1] if _, err = cluster.MemberCommand(member, "sudo", "systemctl", "stop", "fleet"); err != nil { t.Fatal(err) } if err = cluster.DestroyMember(member); err != nil { t.Fatal(err) } if _, err = cluster.WaitForNMachines(2); err != nil { t.Fatal(err) } if _, err = cluster.WaitForNActiveUnits(2); err != nil { t.Fatal(err) } if err = cluster.CreateMember(member, platform.MachineConfig{}); err != nil { t.Fatal(err) } if _, err = cluster.WaitForNMachines(3); err != nil { t.Fatal(err) } newStates, err := cluster.WaitForNActiveUnits(3) if err != nil { t.Fatal(err) } for _, unit := range []string{"conflict.0.service", "conflict.1.service", "conflict.2.service"} { if oldStates[unit].Machine != newStates[unit].Machine { t.Fatalf("Unit %s migrated unexpectedly", unit) } } }
// TestScheduleReplace starts 1 unit, followed by starting another unit // that replaces the 1st unit. Then it verifies that the 2 units are // started on different machines. func TestScheduleReplace(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) members, err := platform.CreateNClusterMembers(cluster, 2) if err != nil { t.Fatal(err) } m0 := members[0] if _, err := cluster.WaitForNMachines(m0, 2); err != nil { t.Fatal(err) } // Start a unit without Replaces uNames := []string{ "fixtures/units/replace.0.service", "fixtures/units/replace.1.service", } if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[0]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[0], stdout, stderr, err) } active, err := cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } _, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } // Start a unit that replaces the former one, replace.0.service if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[1]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[1], stdout, stderr, err) } // Check that both units should show up stdout, stderr, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files:\nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 2 { t.Fatalf("Did not find two units in cluster: \n%s", stdout) } active, err = cluster.WaitForNActiveUnits(m0, 2) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } // Check that the unit 1 is located on a different machine from that of unit 0 nUnits := 2 uNameBase := make([]string, nUnits) machs := make([]string, nUnits) for i, uName := range uNames { uNameBase[i] = path.Base(uName) machs[i] = states[uNameBase[i]].Machine } if machs[0] == machs[1] { t.Fatalf("machine for %s is %s, the same as that of %s.", uNameBase[0], machs[0], uNameBase[1]) } }
func TestScheduleOneWayConflict(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) // Start with a simple three-node cluster members, err := platform.CreateNClusterMembers(cluster, 1) if err != nil { t.Fatal(err) } m0 := members[0] if _, err := cluster.WaitForNMachines(m0, 1); err != nil { t.Fatal(err) } // Start a unit that conflicts with a yet-to-be-scheduled unit name := "fixtures/units/conflicts-with-hello.service" if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err) } active, err := cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } // Start a unit that has not defined conflicts name = "fixtures/units/hello.service" if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err) } // Both units should show up, but only conflicts-with-hello.service // should report ACTIVE stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 2 { t.Fatalf("Did not find two units in cluster: \n%s", stdout) } active, err = cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } for unit := range states { if unit != "conflicts-with-hello.service" { t.Error("Incorrect unit started:", unit) } } // Destroying the conflicting unit should allow the other to start name = "conflicts-with-hello.service" if _, _, err := cluster.Fleetctl(m0, "destroy", name); err != nil { t.Fatalf("Failed destroying %s", name) } // Wait for the destroyed unit to actually disappear timeout, err := util.WaitForState( func() bool { stdout, _, err := cluster.Fleetctl(m0, "list-units", "--no-legend", "--full", "--fields", "unit,active,machine") if err != nil { return false } lines := strings.Split(strings.TrimSpace(stdout), "\n") states := util.ParseUnitStates(lines) for _, state := range states { if state.Name == name { return false } } return true }, ) if err != nil { t.Fatalf("Destroyed unit %s not gone within %v", name, timeout) } active, err = cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } for unit := range states { if unit != "hello.service" { t.Error("Incorrect unit started:", unit) } } }
func TestScheduleGlobalUnits(t *testing.T) { // Create a three-member cluster cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) numGUnits := 3 numAllUnits := 5 members, err := platform.CreateNClusterMembers(cluster, numGUnits) if err != nil { t.Fatal(err) } m0 := members[0] machines, err := cluster.WaitForNMachines(m0, numGUnits) if err != nil { t.Fatal(err) } // Launch a couple of simple units stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", "fixtures/units/hello.service", "fixtures/units/goodbye.service") if err != nil { t.Fatalf("Failed starting units: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } // Both units should show up active _, err = cluster.WaitForNActiveUnits(m0, 2) if err != nil { t.Fatal(err) } // Now add a global unit globalLongPath := "fixtures/units/global.service" stdout, stderr, err = cluster.Fleetctl(m0, "start", "--no-block", globalLongPath) if err != nil { t.Fatalf("Failed starting unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } // Should see 2 + 3 units states, err := cluster.WaitForNActiveUnits(m0, numAllUnits) if err != nil { t.Fatal(err) } // Each machine should have a single global unit globalBase := path.Base(globalLongPath) us := states[globalBase] for _, mach := range machines { var found bool for _, state := range us { if state.Machine == mach { found = true break } } if !found { t.Fatalf("Did not find global unit on machine %v", mach) t.Logf("Found unit states:") for _, state := range states { t.Logf("%#v", state) } } } stdout, stderr, err = cluster.Fleetctl(m0, "status", "--no-block", globalBase) if err != nil { t.Fatalf("Failed getting unit status: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } // restarting global units cmd := "restart" stdout, stderr, err = cluster.Fleetctl(m0, cmd, globalBase) if err != nil { t.Fatalf("Failed restarting global unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } outmap, err := waitForNUnitsStart(cluster, m0, numAllUnits) if err != nil { t.Fatalf("Failed listing global units: %v", err) } glist, _ := outmap[globalBase] if len(glist) != numGUnits { t.Fatalf("Did not find %d global units: got %d", numGUnits, len(glist)) } }
// TestScheduleGlobalConflicts starts 2 global units that conflict with each // other, and check if only the first one can be found. func TestScheduleGlobalConflicts(t *testing.T) { // Create a three-member cluster cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) members, err := platform.CreateNClusterMembers(cluster, 3) if err != nil { t.Fatal(err) } m0 := members[0] machines, err := cluster.WaitForNMachines(m0, 3) if err != nil { t.Fatal(err) } cfGlobal0 := "fixtures/units/conflict-global.0.service" cfGlobal1 := "fixtures/units/conflict-global.1.service" // Launch a global unit stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", cfGlobal0) if err != nil { t.Fatalf("Failed starting units: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } // the global unit should show up active on 3 machines _, err = cluster.WaitForNActiveUnits(m0, 3) if err != nil { t.Fatal(err) } // Now add another global unit, which actually should not be started. stdout, stderr, err = cluster.Fleetctl(m0, "start", "--no-block", cfGlobal1) if err != nil { t.Fatalf("Failed starting unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } // Should see only 3 units states, err := cluster.WaitForNActiveUnits(m0, 3) if err != nil { t.Fatal(err) } // Each machine should have a single global unit conflict-global.0.service, // but not conflict-global.1.service. us0 := states[path.Base(cfGlobal0)] us1 := states[path.Base(cfGlobal1)] for _, mach := range machines { var found bool for _, state := range us0 { if state.Machine == mach { found = true break } } if !found { t.Fatalf("Did not find global unit on machine %v", mach) t.Logf("Found unit states:") for _, state := range states { t.Logf("%#v", state) } } found = false for _, state := range us1 { if state.Machine == mach { found = true break } } if found { t.Fatalf("Did find global unit %s on machine %v", us1, mach) t.Logf("Global units were not conflicted as expected.") } } }
// Start three units using ssh tunnel func TestTunnelScheduleBatchUnits(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) members, err := platform.CreateNClusterMembers(cluster, 3) if err != nil { t.Fatal(err) } m0 := members[0] _, err = cluster.WaitForNMachines(m0, 3) if err != nil { t.Fatal(err) } tmp, err := ioutil.TempFile(os.TempDir(), "known-hosts") if err != nil { t.Fatal(err) } tmp.Close() defer syscall.Unlink(tmp.Name()) khFile := tmp.Name() // Launch one unit if stdout, stderr, err := cluster.FleetctlWithInput(m0, "yes", fmt.Sprintf("--tunnel=%s", m0.IP()), "--strict-host-key-checking=true", fmt.Sprintf("--known-hosts-file=%s", khFile), "start", "fixtures/units/hello.service"); err != nil { t.Fatalf("Unable to submit one unit using ssh tunnel: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } else if strings.Contains(stderr, "Error") { t.Fatalf("Failed to correctly submit unit using ssh tunnel: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } // Combine all parameters and units in one args slice args := []string{ fmt.Sprintf("--tunnel=%s", m0.IP()), "--strict-host-key-checking=true", fmt.Sprintf("--known-hosts-file=%s", khFile), "start", } for i := 1; i <= 10; i++ { args = append(args, fmt.Sprintf("fixtures/units/hello@%d.service", i)) } // Launch a batch of units if stdout, stderr, err := cluster.Fleetctl(m0, args...); err != nil { t.Fatalf("Unable to submit batch of units using ssh tunnel: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } else if strings.Contains(stderr, "Error") { t.Fatalf("Failed to correctly submit batch of units using ssh tunnel: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } _, err = cluster.WaitForNActiveUnits(m0, 11) if err != nil { t.Fatal(err) } }
// TestDetectMachineId checks for etcd registration failing on a duplicated // machine-id on different machines. // First it creates a cluster with 2 members, m0 and m1. Then make their // machine IDs the same as each other, by explicitly setting the m1's ID to // the same as m0's. Test succeeds when an error returns, while test fails // when nothing happens. func TestDetectMachineId(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) members, err := platform.CreateNClusterMembers(cluster, 2) if err != nil { t.Fatal(err) } m0 := members[0] m1 := members[1] _, err = cluster.WaitForNMachines(m0, 2) if err != nil { t.Fatal(err) } machineIdFile := "/etc/machine-id" // Restart fleet service, and check if its systemd status is still active. restartFleetService := func(m platform.Member) error { stdout, err := cluster.MemberCommand(m, "sudo", "systemctl", "restart", "fleet.service") if err != nil { return fmt.Errorf("Failed to restart fleet service\nstdout: %s\nerr: %v", stdout, err) } stdout, _ = cluster.MemberCommand(m, "systemctl", "show", "--property=ActiveState", "fleet") if strings.TrimSpace(stdout) != "ActiveState=active" { return fmt.Errorf("Fleet unit not reported as active: %s", stdout) } stdout, _ = cluster.MemberCommand(m, "systemctl", "show", "--property=Result", "fleet") if strings.TrimSpace(stdout) != "Result=success" { return fmt.Errorf("Result for fleet unit not reported as success: %s", stdout) } return nil } stdout, err := cluster.MemberCommand(m0, "cat", machineIdFile) if err != nil { t.Fatalf("Failed to get machine-id\nstdout: %s\nerr: %v", stdout, err) } m0_machine_id := strings.TrimSpace(stdout) // If the two machine IDs are different with each other, // set the m1's ID to the same one as m0, to intentionally // trigger an error case of duplication of machine ID. stdout, err = cluster.MemberCommand(m1, "echo", m0_machine_id, "|", "sudo", "tee", machineIdFile) if err != nil { t.Fatalf("Failed to replace machine-id\nstdout: %s\nerr: %v", stdout, err) } if err := restartFleetService(m1); err != nil { t.Fatal(err) } // fleetd should actually be running, but failing to list machines. // So we should expect a specific error after running fleetctl list-machines, // like "googlapi: Error 503: fleet server unable to communicate with etcd". stdout, stderr, err := cluster.Fleetctl(m1, "list-machines", "--no-legend") if err != nil { if !strings.Contains(err.Error(), "exit status 1") || !strings.Contains(stderr, "fleet server unable to communicate with etcd") { t.Fatalf("m1: Failed to get list of machines. err: %v\nstderr: %s", err, stderr) } // If both conditions are satisfied, "exit status 1" and // "...unable to communicate...", then it's an expected error. PASS. } else { t.Fatalf("m1: should get an error, but got success.\nstderr: %s", stderr) } // Trigger another test case of m0's ID getting different from m1's. // Then it's expected that m0 and m1 would be working properly with distinct // machine IDs, after having restarted fleet.service both on m0 and m1. stdout, err = cluster.MemberCommand(m0, "echo", util.NewMachineID(), "|", "sudo", "tee", machineIdFile) if err != nil { t.Fatalf("m0: Failed to replace machine-id\nstdout: %s\nerr: %v", stdout, err) } // Restart fleet service on m0, and see that it's still working. if err := restartFleetService(m0); err != nil { t.Fatal(err) } stdout, stderr, err = cluster.Fleetctl(m0, "list-machines", "--no-legend") if err != nil { t.Fatalf("m0: error: %v\nstdout: %s\nstderr: %s", err, stdout, stderr) } }
// Start three pairs of services, asserting each pair land on the same // machine due to the MachineOf options in the unit files. func TestScheduleMachineOf(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) // Start with a simple three-node cluster members, err := platform.CreateNClusterMembers(cluster, 3) if err != nil { t.Fatal(err) } m0 := members[0] machines, err := cluster.WaitForNMachines(m0, 3) if err != nil { t.Fatal(err) } // Ensure we can SSH into each machine using fleetctl for _, machine := range machines { if stdout, stderr, err := cluster.Fleetctl(m0, "--strict-host-key-checking=false", "ssh", machine, "uptime"); err != nil { t.Errorf("Unable to SSH into fleet machine: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } } // Start the 3 pairs of services for i := 0; i < 3; i++ { ping := fmt.Sprintf("fixtures/units/ping.%d.service", i) pong := fmt.Sprintf("fixtures/units/pong.%d.service", i) stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", ping, pong) if err != nil { t.Errorf("Failed starting units: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } } // All 6 services should be visible immediately and become ACTIVE // shortly thereafter stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 6 { t.Fatalf("Did not find six units in cluster: \n%s", stdout) } active, err := cluster.WaitForNActiveUnits(m0, 6) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } for i := 0; i < 3; i++ { ping := fmt.Sprintf("ping.%d.service", i) pingState, ok := states[ping] if !ok { t.Errorf("Failed to find state for %s", ping) continue } pong := fmt.Sprintf("pong.%d.service", i) pongState, ok := states[pong] if !ok { t.Errorf("Failed to find state for %s", pong) continue } if len(pingState.Machine) == 0 { t.Errorf("Unit %s is not reporting machine", ping) } if len(pongState.Machine) == 0 { t.Errorf("Unit %s is not reporting machine", pong) } if pingState.Machine != pongState.Machine { t.Errorf("Units %s and %s are not on same machine", ping, pong) } } // Ensure a pair of units migrate together when their host goes down mach := states["ping.1.service"].Machine if _, _, err = cluster.Fleetctl(m0, "--strict-host-key-checking=false", "ssh", mach, "sudo", "systemctl", "stop", "fleet"); err != nil { t.Fatal(err) } var mN platform.Member if m0.ID() == states["ping.1.service"].Machine { mN = members[1] } else { mN = m0 } if _, err := cluster.WaitForNMachines(mN, 2); err != nil { t.Fatal(err) } active, err = cluster.WaitForNActiveUnits(mN, 6) if err != nil { t.Fatal(err) } states, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } newPingMach := states["ping.1.service"].Machine if mach == newPingMach { t.Fatalf("Unit ping.1.service did not appear to migrate") } newPongMach := states["pong.1.service"].Machine if newPingMach != newPongMach { t.Errorf("Unit pong.1.service did not migrate with ping.1.service") } }
// TestScheduleReplace starts 1 unit, followed by starting another unit // that replaces the 1st unit. Then it verifies that the 2 units are // started on different machines. func TestScheduleReplace(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) // Start with a simple three-node cluster members, err := platform.CreateNClusterMembers(cluster, 2) if err != nil { t.Fatal(err) } m0 := members[0] if _, err := cluster.WaitForNMachines(m0, 2); err != nil { t.Fatal(err) } // Start a unit without Replaces uNames := []string{ "fixtures/units/replace.0.service", "fixtures/units/replace.1.service", } if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[0]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[0], stdout, stderr, err) } active, err := cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } _, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } // Start a unit that replaces the former one, replace.0.service if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[1]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[1], stdout, stderr, err) } // Check that both units should show up stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 2 { t.Fatalf("Did not find two units in cluster: \n%s", stdout) } active, err = cluster.WaitForNActiveUnits(m0, 2) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } // Check that the unit 1 is located on a different machine from that of unit 0 nUnits := 2 uNameBase := make([]string, nUnits) machs := make([]string, nUnits) for i, uName := range uNames { uNameBase[i] = path.Base(uName) machs[i] = states[uNameBase[i]].Machine } if machs[0] == machs[1] { t.Fatalf("machine for %s is %s, the same as that of %s.", uNameBase[0], machs[0], uNameBase[1]) } // Check that circular replaces end up with 1 launched unit. // First of all, stop the existing unit replace.0.service. if stdout, stderr, err := cluster.Fleetctl(m0, "destroy", uNameBase[0]); err != nil { t.Fatalf("Failed to destroy unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNameBase[0], stdout, stderr, err) } // Generate a new service 0 derived by a fixture, make the new service // replace service 1, and store it under /tmp. uName0tmp := path.Join("/tmp", uNameBase[0]) err = util.GenNewFleetService(uName0tmp, uNames[1], "Replaces=replace.1.service", "Replaces=replace.0.service") if err != nil { t.Fatalf("Failed to generate a temp fleet service: %v", err) } // Start replace.0 unit that replaces replace.1.service, // then fleetctl list-unit-files should show only return 1 launched unit. // Note that we still need to run list-units once, before doing // list-unit-files, for reliable tests. stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uName0tmp) if err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uName0tmp, stdout, stderr, err) } stdout, _, err = cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } units = strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != nUnits { t.Fatalf("Did not find two units in cluster: \n%s", stdout) } _, err = cluster.WaitForNActiveUnits(m0, nUnits) if err != nil { t.Fatal(err) } ufs, err := cluster.WaitForNUnitFiles(m0, nUnits) if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } uStates := make([][]util.UnitFileState, nUnits) var found bool for i, unb := range uNameBase { uStates[i], found = ufs[unb] if len(ufs) != nUnits || !found { t.Fatalf("Did not find %d launched unit as expected: got %d\n", nUnits, len(ufs)) } } nLaunched := 0 for _, us := range uStates { for _, state := range us { if strings.Contains(state.State, "launched") { nLaunched += 1 } } } if nLaunched != 1 { t.Fatalf("Did not find 1 launched unit as expected: got %d", nLaunched) } os.Remove(uName0tmp) }
// TestScheduleCircularReplace starts 2 units that tries to replace each other. // Thus it's expected that only one of the units becomes active. func TestScheduleCircularReplace(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) members, err := platform.CreateNClusterMembers(cluster, 2) if err != nil { t.Fatal(err) } m0 := members[0] if _, err := cluster.WaitForNMachines(m0, 2); err != nil { t.Fatal(err) } // Check that circular replaces end up with 1 launched unit. // To do that, generate a new service 0 that replaces service 1, and store // it under /tmp. Also store the original service 1 that replace 0. uNames := []string{ "fixtures/units/replace.0.service", "fixtures/units/replace.1.service", } nUnits := 2 nActiveUnits := 1 uNameBase := make([]string, nUnits) for i, uName := range uNames { uNameBase[i] = path.Base(uName) } uName0tmp := path.Join("/tmp", uNameBase[0]) err = util.GenNewFleetService(uName0tmp, uNames[1], "Replaces=replace.1.service", "Replaces=replace.0.service") if err != nil { t.Fatalf("Failed to generate a temp fleet service: %v", err) } // Start replace.0 unit that replaces replace.1.service, // then fleetctl list-unit-files should show only return 1 launched unit. stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uName0tmp) if err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uName0tmp, stdout, stderr, err) } stdout, stderr, err = cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files:\nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != nActiveUnits { t.Fatalf("Did not find two units in cluster: \n%s", stdout) } _, err = cluster.WaitForNActiveUnits(m0, nActiveUnits) if err != nil { t.Fatal(err) } ufs, err := cluster.WaitForNUnitFiles(m0, nActiveUnits) if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } // Start replace.1 unit that replaces replace.0.service, // and then check that only 1 unit is active if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[1]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[1], stdout, stderr, err) } stdout, stderr, err = cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files:\nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } units = strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != nUnits { t.Fatalf("Did not find %d units in cluster: \n%s", nUnits, stdout) } active, err := cluster.WaitForNActiveUnits(m0, nActiveUnits) if err != nil { t.Fatal(err) } _, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } uStates := make([][]util.UnitFileState, nUnits) for i, unb := range uNameBase { uStates[i], _ = ufs[unb] } nLaunched := 0 for _, us := range uStates { for _, state := range us { if strings.Contains(state.State, "launched") { nLaunched += 1 } } } if nLaunched != nActiveUnits { t.Fatalf("Did not find %d launched unit as expected: got %d", nActiveUnits, nLaunched) } os.Remove(uName0tmp) }
// TestScheduleReplace starts 3 units, followed by starting another unit // that replaces the 1st unit. Then it verifies that the original unit // got rescheduled on a different machine. func TestScheduleReplace(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) members, err := platform.CreateNClusterMembers(cluster, 2) if err != nil { t.Fatal(err) } m0 := members[0] m1 := members[1] if _, err := cluster.WaitForNMachines(m0, 2); err != nil { t.Fatal(err) } // Start 3 units without Replaces, replace.0.service on m0, while both 1 and 2 on m1. // That's possible as replace.2.service has an option "MachineOf=replace.1.service". uNames := []string{ "fixtures/units/replace.0.service", "fixtures/units/replace.1.service", "fixtures/units/replace.2.service", "fixtures/units/replace-kick0.service", } if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[0]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[0], stdout, stderr, err) } if stdout, stderr, err := cluster.Fleetctl(m1, "start", "--no-block", uNames[1]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[1], stdout, stderr, err) } if stdout, stderr, err := cluster.Fleetctl(m1, "start", "--no-block", uNames[2]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[2], stdout, stderr, err) } active, err := cluster.WaitForNActiveUnits(m0, 3) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } oldMach := states[path.Base(uNames[0])].Machine // Start a unit replace-kick0.service that replaces replace.0.service // Then the kick0 unit will be scheduled to m0, as m0 is least loaded than m1. // So it's possible to trigger a situation where kick0 could kick the original unit 0. if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[3]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[3], stdout, stderr, err) } // Here we need to wait up to 15 seconds, to avoid races, because the unit state // publisher could otherwise report unit states with old machine IDs to registry. checkReplacedMachines := func() bool { // Check that 4 units show up nUnits := 4 stdout, stderr, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Logf("Failed to run list-unit-files:\nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) return false } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != nUnits { t.Logf("Did not find two units in cluster: \n%s", stdout) return false } active, err = cluster.WaitForNActiveUnits(m0, nUnits) if err != nil { t.Log(err) return false } states, err = util.ActiveToSingleStates(active) if err != nil { t.Log(err) return false } // Check that replace.0.service is located on a different machine from // that of replace-kick0.service. uNameBase := make([]string, nUnits) machs := make([]string, nUnits) for i, uName := range uNames { uNameBase[i] = path.Base(uName) machs[i] = states[uNameBase[i]].Machine } if machs[0] == machs[3] { t.Logf("machine for %s is %s, the same as that of %s.", uNameBase[0], machs[0], uNameBase[3]) return false } if machs[3] != oldMach { t.Logf("machine for %s is %s, different from old machine %s.", uNameBase[3], machs[3], oldMach) return false } if machs[0] == oldMach { t.Logf("machine for %s is %s, the same as that of %s.", uNameBase[0], machs[0], oldMach) return false } return true } if timeout, err := util.WaitForState(checkReplacedMachines); err != nil { t.Fatalf("Cannot verify replaced units within %v\nerr: %v", timeout, err) } }
// Load service and discovery units and test whether discovery unit adds itself as a dependency for the service. func TestInstallUnit(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) // Start with a two-nodes cluster members, err := platform.CreateNClusterMembers(cluster, 2) if err != nil { t.Fatal(err) } m0 := members[0] _, err = cluster.WaitForNMachines(m0, 2) if err != nil { t.Fatal(err) } // Load unit files stdout, stderr, err := cluster.Fleetctl(m0, "load", "fixtures/units/hello.service", "fixtures/units/discovery.service") if err != nil { t.Fatalf("Failed loading unit files: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } checkState := func(match string) bool { stdout, _, err := cluster.Fleetctl(m0, "--strict-host-key-checking=false", "ssh", "discovery.service", "systemctl show --property=ActiveState discovery.service") if err != nil { t.Logf("Failed getting info using remote systemctl: %v", err) } stdout = strings.TrimSpace(stdout) return stdout == fmt.Sprintf("ActiveState=%s", match) } // Verify that discovery.service unit is loaded but not started timeout, err := util.WaitForState(func() bool { return checkState("inactive") }) if err != nil { t.Fatalf("discovery.service unit is not reported as inactive within %v: %v", timeout, err) } // Start hello.service unit stdout, stderr, err = cluster.Fleetctl(m0, "start", "fixtures/units/hello.service") if err != nil { t.Fatalf("Failed starting unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } // Verify that discovery.service unit was started timeout, err = util.WaitForState(func() bool { return checkState("active") }) if err != nil { t.Fatalf("discovery.service unit is not reported as active within %v:\n%v", timeout, err) } // Stop hello.service unit stdout, stderr, err = cluster.Fleetctl(m0, "stop", "fixtures/units/hello.service") if err != nil { t.Fatalf("Failed stopping unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } // Verify that discovery.service unit was stopped timeout, err = util.WaitForState(func() bool { return checkState("inactive") }) if err != nil { t.Fatalf("discovery.service unit is not reported as inactive within %v:\n%v", timeout, err) } }