// Ensure units can be scheduled directly to a given machine using the // MachineID unit option. func TestScheduleConditionMachineID(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) // Start with a simple three-node cluster members, err := platform.CreateNClusterMembers(cluster, 3) if err != nil { t.Fatal(err) } m0 := members[0] machines, err := cluster.WaitForNMachines(m0, 3) if err != nil { t.Fatal(err) } // Start 3 units that are each scheduled to one of our machines schedule := make(map[string]string) for _, machine := range machines { contents := ` [Service] ExecStart=/bin/bash -c "while true; do echo Hello, World!; sleep 1; done" [X-Fleet] MachineID=%s ` unitFile, err := util.TempUnit(fmt.Sprintf(contents, machine)) if err != nil { t.Fatalf("Failed creating temporary unit: %v", err) } defer os.Remove(unitFile) stdout, stderr, err := cluster.Fleetctl(m0, "start", unitFile) if err != nil { t.Fatalf("Failed starting unit file %s: \nstdout: %s\nstderr: %s\nerr: %v", unitFile, stdout, stderr, err) } unit := filepath.Base(unitFile) schedule[unit] = machine } // Block until our three units have been started active, err := cluster.WaitForNActiveUnits(m0, 3) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } for unit, unitState := range states { if unitState.Machine != schedule[unit] { t.Errorf("Unit %s was scheduled to %s, expected %s", unit, unitState.Machine, schedule[unit]) } } }
// Start three pairs of services, asserting each pair land on the same // machine due to the MachineOf options in the unit files. func TestScheduleMachineOf(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) // Start with a simple three-node cluster members, err := platform.CreateNClusterMembers(cluster, 3) if err != nil { t.Fatal(err) } m0 := members[0] machines, err := cluster.WaitForNMachines(m0, 3) if err != nil { t.Fatal(err) } // Ensure we can SSH into each machine using fleetctl for _, machine := range machines { if stdout, stderr, err := cluster.Fleetctl(m0, "--strict-host-key-checking=false", "ssh", machine, "uptime"); err != nil { t.Errorf("Unable to SSH into fleet machine: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } } // Start the 3 pairs of services for i := 0; i < 3; i++ { ping := fmt.Sprintf("fixtures/units/ping.%d.service", i) pong := fmt.Sprintf("fixtures/units/pong.%d.service", i) stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", ping, pong) if err != nil { t.Errorf("Failed starting units: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } } // All 6 services should be visible immediately and become ACTIVE // shortly thereafter stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 6 { t.Fatalf("Did not find six units in cluster: \n%s", stdout) } active, err := cluster.WaitForNActiveUnits(m0, 6) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } for i := 0; i < 3; i++ { ping := fmt.Sprintf("ping.%d.service", i) pingState, ok := states[ping] if !ok { t.Errorf("Failed to find state for %s", ping) continue } pong := fmt.Sprintf("pong.%d.service", i) pongState, ok := states[pong] if !ok { t.Errorf("Failed to find state for %s", pong) continue } if len(pingState.Machine) == 0 { t.Errorf("Unit %s is not reporting machine", ping) } if len(pongState.Machine) == 0 { t.Errorf("Unit %s is not reporting machine", pong) } if pingState.Machine != pongState.Machine { t.Errorf("Units %s and %s are not on same machine", ping, pong) } } // Ensure a pair of units migrate together when their host goes down mach := states["ping.1.service"].Machine if _, _, err = cluster.Fleetctl(m0, "--strict-host-key-checking=false", "ssh", mach, "sudo", "systemctl", "stop", "fleet"); err != nil { t.Fatal(err) } var mN platform.Member if m0.ID() == states["ping.1.service"].Machine { mN = members[1] } else { mN = m0 } if _, err := cluster.WaitForNMachines(mN, 2); err != nil { t.Fatal(err) } active, err = cluster.WaitForNActiveUnits(mN, 6) if err != nil { t.Fatal(err) } states, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } newPingMach := states["ping.1.service"].Machine if mach == newPingMach { t.Fatalf("Unit ping.1.service did not appear to migrate") } newPongMach := states["pong.1.service"].Machine if newPingMach != newPongMach { t.Errorf("Unit pong.1.service did not migrate with ping.1.service") } }
// TestScheduleReplace starts 1 unit, followed by starting another unit // that replaces the 1st unit. Then it verifies that the 2 units are // started on different machines. func TestScheduleReplace(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) // Start with a simple three-node cluster members, err := platform.CreateNClusterMembers(cluster, 2) if err != nil { t.Fatal(err) } m0 := members[0] if _, err := cluster.WaitForNMachines(m0, 2); err != nil { t.Fatal(err) } // Start a unit without Replaces uNames := []string{ "fixtures/units/replace.0.service", "fixtures/units/replace.1.service", } if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[0]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[0], stdout, stderr, err) } active, err := cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } _, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } // Start a unit that replaces the former one, replace.0.service if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[1]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[1], stdout, stderr, err) } // Check that both units should show up stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 2 { t.Fatalf("Did not find two units in cluster: \n%s", stdout) } active, err = cluster.WaitForNActiveUnits(m0, 2) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } // Check that the unit 1 is located on a different machine from that of unit 0 nUnits := 2 uNameBase := make([]string, nUnits) machs := make([]string, nUnits) for i, uName := range uNames { uNameBase[i] = path.Base(uName) machs[i] = states[uNameBase[i]].Machine } if machs[0] == machs[1] { t.Fatalf("machine for %s is %s, the same as that of %s.", uNameBase[0], machs[0], uNameBase[1]) } // Check that circular replaces end up with 1 launched unit. // First of all, stop the existing unit replace.0.service. if stdout, stderr, err := cluster.Fleetctl(m0, "destroy", uNameBase[0]); err != nil { t.Fatalf("Failed to destroy unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNameBase[0], stdout, stderr, err) } // Generate a new service 0 derived by a fixture, make the new service // replace service 1, and store it under /tmp. uName0tmp := path.Join("/tmp", uNameBase[0]) err = util.GenNewFleetService(uName0tmp, uNames[1], "Replaces=replace.1.service", "Replaces=replace.0.service") if err != nil { t.Fatalf("Failed to generate a temp fleet service: %v", err) } // Start replace.0 unit that replaces replace.1.service, // then fleetctl list-unit-files should show only return 1 launched unit. // Note that we still need to run list-units once, before doing // list-unit-files, for reliable tests. stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uName0tmp) if err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uName0tmp, stdout, stderr, err) } stdout, _, err = cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } units = strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != nUnits { t.Fatalf("Did not find two units in cluster: \n%s", stdout) } _, err = cluster.WaitForNActiveUnits(m0, nUnits) if err != nil { t.Fatal(err) } ufs, err := cluster.WaitForNUnitFiles(m0, nUnits) if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } uStates := make([][]util.UnitFileState, nUnits) var found bool for i, unb := range uNameBase { uStates[i], found = ufs[unb] if len(ufs) != nUnits || !found { t.Fatalf("Did not find %d launched unit as expected: got %d\n", nUnits, len(ufs)) } } nLaunched := 0 for _, us := range uStates { for _, state := range us { if strings.Contains(state.State, "launched") { nLaunched += 1 } } } if nLaunched != 1 { t.Fatalf("Did not find 1 launched unit as expected: got %d", nLaunched) } os.Remove(uName0tmp) }
// Start 5 services that conflict with one another. Assert that only // 3 of the 5 are started. func TestScheduleConflicts(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) // Start with a simple three-node cluster members, err := platform.CreateNClusterMembers(cluster, 3) if err != nil { t.Fatal(err) } m0 := members[0] machines, err := cluster.WaitForNMachines(m0, 3) if err != nil { t.Fatal(err) } // Ensure we can SSH into each machine using fleetctl for _, machine := range machines { if stdout, stderr, err := cluster.Fleetctl(m0, "--strict-host-key-checking=false", "ssh", machine, "uptime"); err != nil { t.Errorf("Unable to SSH into fleet machine: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } } for i := 0; i < 5; i++ { unit := fmt.Sprintf("fixtures/units/conflict.%d.service", i) stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", unit) if err != nil { t.Errorf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", unit, stdout, stderr, err) } } // All 5 services should be visible immediately and 3 should become // ACTIVE shortly thereafter stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 5 { t.Fatalf("Did not find five units in cluster: \n%s", stdout) } active, err := cluster.WaitForNActiveUnits(m0, 3) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } machineSet := make(map[string]bool) for unit, unitState := range states { if len(unitState.Machine) == 0 { t.Errorf("Unit %s is not reporting machine", unit) } machineSet[unitState.Machine] = true } if len(machineSet) != 3 { t.Errorf("3 active units not running on 3 unique machines") } }
func TestScheduleOneWayConflict(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) // Start with a simple three-node cluster members, err := platform.CreateNClusterMembers(cluster, 1) if err != nil { t.Fatal(err) } m0 := members[0] if _, err := cluster.WaitForNMachines(m0, 1); err != nil { t.Fatal(err) } // Start a unit that conflicts with a yet-to-be-scheduled unit name := "fixtures/units/conflicts-with-hello.service" if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err) } active, err := cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } // Start a unit that has not defined conflicts name = "fixtures/units/hello.service" if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err) } // Both units should show up, but only conflicts-with-hello.service // should report ACTIVE stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 2 { t.Fatalf("Did not find two units in cluster: \n%s", stdout) } active, err = cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } for unit := range states { if unit != "conflicts-with-hello.service" { t.Error("Incorrect unit started:", unit) } } // Destroying the conflicting unit should allow the other to start name = "conflicts-with-hello.service" if _, _, err := cluster.Fleetctl(m0, "destroy", name); err != nil { t.Fatalf("Failed destroying %s", name) } // NOTE: we need to sleep here shortly to avoid occasional errors of // conflicts-with-hello.service being rescheduled even after being destroyed. // In that case, the conflicts unit remains active, while the original // hello.service remains inactive. Then the test TestScheduleOneWayConflict // fails at the end with a message "Incorrect unit started". // This error seems to occur frequently when enable_grpc turned on. // - dpark 20160615 time.Sleep(1 * time.Second) // Wait for the destroyed unit to actually disappear timeout, err := util.WaitForState( func() bool { stdout, _, err := cluster.Fleetctl(m0, "list-units", "--no-legend", "--full", "--fields", "unit,active,machine") if err != nil { return false } lines := strings.Split(strings.TrimSpace(stdout), "\n") states := util.ParseUnitStates(lines) for _, state := range states { if state.Name == name { return false } } return true }, ) if err != nil { t.Fatalf("Destroyed unit %s not gone within %v", name, timeout) } active, err = cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } for unit := range states { if unit != "hello.service" { t.Error("Incorrect unit started:", unit) } } }
// Simulate rebooting a single member of a fleet cluster func TestDynamicClusterMemberReboot(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy() // Start with a simple three-node cluster members, err := platform.CreateNClusterMembers(cluster, 3) if err != nil { t.Fatal(err) } m0 := members[0] if _, err = cluster.WaitForNMachines(m0, 3); err != nil { t.Fatal(err) } _, _, err = cluster.Fleetctl(m0, "start", "fixtures/units/conflict.0.service", "fixtures/units/conflict.1.service", "fixtures/units/conflict.2.service", ) if err != nil { t.Errorf("Failed starting units: %v", err) } // All 3 services should be visible immediately, and all of them should // become ACTIVE shortly thereafter stdout, _, err := cluster.Fleetctl(m0, "list-units", "--no-legend") if err != nil { t.Fatalf("Failed to run list-units: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 3 { t.Fatalf("Did not find 3 units in cluster: \n%s", stdout) } oldActive, err := cluster.WaitForNActiveUnits(m0, 3) if err != nil { t.Fatal(err) } oldStates, err := util.ActiveToSingleStates(oldActive) if err != nil { t.Fatal(err) } // Simulate a reboot by recreating one of the cluster members if _, err := cluster.ReplaceMember(cluster.Members()[1]); err != nil { t.Fatalf("replace failed: %v", err) } newActive, err := cluster.WaitForNActiveUnits(m0, 3) if err != nil { t.Fatal(err) } newStates, err := util.ActiveToSingleStates(newActive) if err != nil { t.Fatal(err) } migrated := 0 for _, unit := range []string{"conflict.0.service", "conflict.1.service", "conflict.2.service"} { if oldStates[unit].Machine != newStates[unit].Machine { migrated += 1 } } if migrated != 1 { t.Errorf("Expected 1 unit to migrate, but found %d", migrated) t.Logf("Initial state: %#v", oldStates) t.Logf("Post-reboot state: %#v", newStates) } }
// Ensure an existing unit migrates to an unoccupied machine // if its host goes down. func TestDynamicClusterNewMemberUnitMigration(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy() // Start with a 4-node cluster members, err := platform.CreateNClusterMembers(cluster, 4) if err != nil { t.Fatal(err) } m0 := members[0] if _, err = cluster.WaitForNMachines(m0, 4); err != nil { t.Fatal(err) } // Start 3 conflicting units on the 4-node cluster _, _, err = cluster.Fleetctl(m0, "start", "fixtures/units/conflict.0.service", "fixtures/units/conflict.1.service", "fixtures/units/conflict.2.service", ) if err != nil { t.Errorf("Failed starting units: %v", err) } // All 3 services should be visible immediately, and all of them should // become ACTIVE shortly thereafter stdout, _, err := cluster.Fleetctl(m0, "list-units", "--no-legend") if err != nil { t.Fatalf("Failed to run list-units: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 3 { t.Fatalf("Did not find 3 units in cluster: \n%s", stdout) } active, err := cluster.WaitForNActiveUnits(m0, 3) if err != nil { t.Fatal(err) } // Ensure each unit is only running on a single machine states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } // Kill one of the machines and make sure the unit migrates somewhere else unit := "conflict.1.service" oldMach := states[unit].Machine oldIP := states[unit].IP if _, _, err = cluster.Fleetctl(m0, "--strict-host-key-checking=false", "ssh", oldMach, "sudo", "systemctl", "stop", "fleet"); err != nil { t.Fatal(err) } var mN platform.Member if m0.IP() == oldIP { mN = members[1] } else { mN = m0 } if _, err = cluster.WaitForNMachines(mN, 3); err != nil { t.Fatal(err) } newActive, err := cluster.WaitForNActiveUnits(mN, 3) if err != nil { t.Fatal(err) } // Ensure each unit is only running on a single machine newStates, err := util.ActiveToSingleStates(newActive) if err != nil { t.Fatal(err) } newMach := newStates[unit].Machine if newMach == oldMach { t.Fatalf("Unit %s did not migrate from machine %s to %s", unit, oldMach, newMach) } // Ensure no other units migrated due to this churn if newMach == states["conflict.0.service"].Machine || newMach == states["conflict.2.service"].Machine { t.Errorf("Unit %s landed on occupied machine", unit) } if states["conflict.0.service"].Machine != newStates["conflict.0.service"].Machine || states["conflict.2.service"].Machine != newStates["conflict.2.service"].Machine { t.Errorf("Unit caused unnecessary churn in the cluster") } }
func TestScheduleOneWayConflict(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy() // Start with a simple three-node cluster members, err := platform.CreateNClusterMembers(cluster, 1) if err != nil { t.Fatal(err) } m0 := members[0] if _, err := cluster.WaitForNMachines(m0, 1); err != nil { t.Fatal(err) } // Start a unit that conflicts with a yet-to-be-scheduled unit name := "fixtures/units/conflicts-with-hello.service" if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err) } active, err := cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } // Start a unit that has not defined conflicts name = "fixtures/units/hello.service" if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err) } // Both units should show up, but only conflicts-with-hello.service // should report ACTIVE stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 2 { t.Fatalf("Did not find two units in cluster: \n%s", stdout) } active, err = cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } for unit := range states { if unit != "conflicts-with-hello.service" { t.Error("Incorrect unit started:", unit) } } // Destroying the conflicting unit should allow the other to start name = "conflicts-with-hello.service" if _, _, err := cluster.Fleetctl(m0, "destroy", name); err != nil { t.Fatalf("Failed destroying %s", name) } // TODO(jonboulle): fix this race. Since we no longer immediately // remove unit state on unit destruction (and instead wait for // UnitStateGenerator/UnitStatePublisher to clean up), the old unit // shows up as active for quite some time. time.Sleep(5 * time.Second) stdout, _, err = cluster.Fleetctl(m0, "list-units", "--no-legend") if err != nil { t.Fatalf("Failed to run list-units: %v", err) } units = strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 1 { t.Fatalf("Did not find one unit in cluster: \n%s", stdout) } active, err = cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } for unit := range states { if unit != "hello.service" { t.Error("Incorrect unit started:", unit) } } }
func TestScheduleOneWayConflict(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) // Start with a simple three-node cluster members, err := platform.CreateNClusterMembers(cluster, 1) if err != nil { t.Fatal(err) } m0 := members[0] if _, err := cluster.WaitForNMachines(m0, 1); err != nil { t.Fatal(err) } // Start a unit that conflicts with a yet-to-be-scheduled unit name := "fixtures/units/conflicts-with-hello.service" if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err) } active, err := cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } // Start a unit that has not defined conflicts name = "fixtures/units/hello.service" if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err) } // Both units should show up, but only conflicts-with-hello.service // should report ACTIVE stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 2 { t.Fatalf("Did not find two units in cluster: \n%s", stdout) } active, err = cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } for unit := range states { if unit != "conflicts-with-hello.service" { t.Error("Incorrect unit started:", unit) } } // Destroying the conflicting unit should allow the other to start name = "conflicts-with-hello.service" if _, _, err := cluster.Fleetctl(m0, "destroy", name); err != nil { t.Fatalf("Failed destroying %s", name) } // Wait for the destroyed unit to actually disappear timeout, err := util.WaitForState( func() bool { stdout, _, err := cluster.Fleetctl(m0, "list-units", "--no-legend", "--full", "--fields", "unit,active,machine") if err != nil { return false } lines := strings.Split(strings.TrimSpace(stdout), "\n") states := util.ParseUnitStates(lines) for _, state := range states { if state.Name == name { return false } } return true }, ) if err != nil { t.Fatalf("Destroyed unit %s not gone within %v", name, timeout) } active, err = cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } states, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } for unit := range states { if unit != "hello.service" { t.Error("Incorrect unit started:", unit) } } }
// TestScheduleCircularReplace starts 2 units that tries to replace each other. // Thus it's expected that only one of the units becomes active. func TestScheduleCircularReplace(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) members, err := platform.CreateNClusterMembers(cluster, 2) if err != nil { t.Fatal(err) } m0 := members[0] if _, err := cluster.WaitForNMachines(m0, 2); err != nil { t.Fatal(err) } // Check that circular replaces end up with 1 launched unit. // To do that, generate a new service 0 that replaces service 1, and store // it under /tmp. Also store the original service 1 that replace 0. uNames := []string{ "fixtures/units/replace.0.service", "fixtures/units/replace.1.service", } nUnits := 2 nActiveUnits := 1 uNameBase := make([]string, nUnits) for i, uName := range uNames { uNameBase[i] = path.Base(uName) } uName0tmp := path.Join("/tmp", uNameBase[0]) err = util.GenNewFleetService(uName0tmp, uNames[1], "Replaces=replace.1.service", "Replaces=replace.0.service") if err != nil { t.Fatalf("Failed to generate a temp fleet service: %v", err) } // Start replace.0 unit that replaces replace.1.service, // then fleetctl list-unit-files should show only return 1 launched unit. stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uName0tmp) if err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uName0tmp, stdout, stderr, err) } stdout, stderr, err = cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files:\nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != nActiveUnits { t.Fatalf("Did not find two units in cluster: \n%s", stdout) } _, err = cluster.WaitForNActiveUnits(m0, nActiveUnits) if err != nil { t.Fatal(err) } ufs, err := cluster.WaitForNUnitFiles(m0, nActiveUnits) if err != nil { t.Fatalf("Failed to run list-unit-files: %v", err) } // Start replace.1 unit that replaces replace.0.service, // and then check that only 1 unit is active if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[1]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[1], stdout, stderr, err) } stdout, stderr, err = cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files:\nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } units = strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != nUnits { t.Fatalf("Did not find %d units in cluster: \n%s", nUnits, stdout) } active, err := cluster.WaitForNActiveUnits(m0, nActiveUnits) if err != nil { t.Fatal(err) } _, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } uStates := make([][]util.UnitFileState, nUnits) for i, unb := range uNameBase { uStates[i], _ = ufs[unb] } nLaunched := 0 for _, us := range uStates { for _, state := range us { if strings.Contains(state.State, "launched") { nLaunched += 1 } } } if nLaunched != nActiveUnits { t.Fatalf("Did not find %d launched unit as expected: got %d", nActiveUnits, nLaunched) } os.Remove(uName0tmp) }
// TestScheduleReplace starts 1 unit, followed by starting another unit // that replaces the 1st unit. Then it verifies that the 2 units are // started on different machines. func TestScheduleReplace(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) members, err := platform.CreateNClusterMembers(cluster, 2) if err != nil { t.Fatal(err) } m0 := members[0] if _, err := cluster.WaitForNMachines(m0, 2); err != nil { t.Fatal(err) } // Start a unit without Replaces uNames := []string{ "fixtures/units/replace.0.service", "fixtures/units/replace.1.service", } if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[0]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[0], stdout, stderr, err) } active, err := cluster.WaitForNActiveUnits(m0, 1) if err != nil { t.Fatal(err) } _, err = util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } // Start a unit that replaces the former one, replace.0.service if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[1]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[1], stdout, stderr, err) } // Check that both units should show up stdout, stderr, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Fatalf("Failed to run list-unit-files:\nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != 2 { t.Fatalf("Did not find two units in cluster: \n%s", stdout) } active, err = cluster.WaitForNActiveUnits(m0, 2) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } // Check that the unit 1 is located on a different machine from that of unit 0 nUnits := 2 uNameBase := make([]string, nUnits) machs := make([]string, nUnits) for i, uName := range uNames { uNameBase[i] = path.Base(uName) machs[i] = states[uNameBase[i]].Machine } if machs[0] == machs[1] { t.Fatalf("machine for %s is %s, the same as that of %s.", uNameBase[0], machs[0], uNameBase[1]) } }
// TestScheduleReplace starts 3 units, followed by starting another unit // that replaces the 1st unit. Then it verifies that the original unit // got rescheduled on a different machine. func TestScheduleReplace(t *testing.T) { cluster, err := platform.NewNspawnCluster("smoke") if err != nil { t.Fatal(err) } defer cluster.Destroy(t) members, err := platform.CreateNClusterMembers(cluster, 2) if err != nil { t.Fatal(err) } m0 := members[0] m1 := members[1] if _, err := cluster.WaitForNMachines(m0, 2); err != nil { t.Fatal(err) } // Start 3 units without Replaces, replace.0.service on m0, while both 1 and 2 on m1. // That's possible as replace.2.service has an option "MachineOf=replace.1.service". uNames := []string{ "fixtures/units/replace.0.service", "fixtures/units/replace.1.service", "fixtures/units/replace.2.service", "fixtures/units/replace-kick0.service", } if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[0]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[0], stdout, stderr, err) } if stdout, stderr, err := cluster.Fleetctl(m1, "start", "--no-block", uNames[1]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[1], stdout, stderr, err) } if stdout, stderr, err := cluster.Fleetctl(m1, "start", "--no-block", uNames[2]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[2], stdout, stderr, err) } active, err := cluster.WaitForNActiveUnits(m0, 3) if err != nil { t.Fatal(err) } states, err := util.ActiveToSingleStates(active) if err != nil { t.Fatal(err) } oldMach := states[path.Base(uNames[0])].Machine // Start a unit replace-kick0.service that replaces replace.0.service // Then the kick0 unit will be scheduled to m0, as m0 is least loaded than m1. // So it's possible to trigger a situation where kick0 could kick the original unit 0. if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[3]); err != nil { t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[3], stdout, stderr, err) } // Here we need to wait up to 15 seconds, to avoid races, because the unit state // publisher could otherwise report unit states with old machine IDs to registry. checkReplacedMachines := func() bool { // Check that 4 units show up nUnits := 4 stdout, stderr, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend") if err != nil { t.Logf("Failed to run list-unit-files:\nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err) return false } units := strings.Split(strings.TrimSpace(stdout), "\n") if len(units) != nUnits { t.Logf("Did not find two units in cluster: \n%s", stdout) return false } active, err = cluster.WaitForNActiveUnits(m0, nUnits) if err != nil { t.Log(err) return false } states, err = util.ActiveToSingleStates(active) if err != nil { t.Log(err) return false } // Check that replace.0.service is located on a different machine from // that of replace-kick0.service. uNameBase := make([]string, nUnits) machs := make([]string, nUnits) for i, uName := range uNames { uNameBase[i] = path.Base(uName) machs[i] = states[uNameBase[i]].Machine } if machs[0] == machs[3] { t.Logf("machine for %s is %s, the same as that of %s.", uNameBase[0], machs[0], uNameBase[3]) return false } if machs[3] != oldMach { t.Logf("machine for %s is %s, different from old machine %s.", uNameBase[3], machs[3], oldMach) return false } if machs[0] == oldMach { t.Logf("machine for %s is %s, the same as that of %s.", uNameBase[0], machs[0], oldMach) return false } return true } if timeout, err := util.WaitForState(checkReplacedMachines); err != nil { t.Fatalf("Cannot verify replaced units within %v\nerr: %v", timeout, err) } }