Exemple #1
0
// Start 5 services that conflict with one another. Assert that only
// 3 of the 5 are started.
func TestScheduleGlobalConflicts(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy()

	// Start with a simple three-node cluster
	if err := platform.CreateNClusterMembers(cluster, 3, platform.MachineConfig{}); err != nil {
		t.Fatal(err)
	}
	machines, err := cluster.WaitForNMachines(3)
	if err != nil {
		t.Fatal(err)
	}

	// Ensure we can SSH into each machine using fleetctl
	for _, machine := range machines {
		if _, _, err := cluster.Fleetctl("--strict-host-key-checking=false", "ssh", machine, "uptime"); err != nil {
			t.Errorf("Unable to SSH into fleet machine: %v", err)
		}
	}

	for i := 0; i < 5; i++ {
		unit := fmt.Sprintf("fixtures/units/conflict.%d.service", i)
		_, _, err := cluster.Fleetctl("start", "--no-block", unit)
		if err != nil {
			t.Errorf("Failed starting unit %s: %v", unit, err)
		}
	}

	// All 5 services should be visible immediately and 3 should become
	// ACTIVE shortly thereafter
	stdout, _, err := cluster.Fleetctl("list-units", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-units: %v", err)
	}
	units := strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != 5 {
		t.Fatalf("Did not find five units in cluster: \n%s", stdout)
	}
	states, err := cluster.WaitForNActiveUnits(3)
	if err != nil {
		t.Fatal(err)
	}

	machineSet := make(map[string]bool)

	for unit, unitState := range states {
		if len(unitState.Machine) == 0 {
			t.Errorf("Unit %s is not reporting machine", unit)
		}

		machineSet[unitState.Machine] = true
	}

	if len(machineSet) != 3 {
		t.Errorf("3 active units not running on 3 unique machines")
	}
}
// TestUnitRunnable is the simplest test possible, deplying a single-node
// cluster and ensuring a unit can enter an 'active' state
func TestUnitRunnable(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy()

	if err := platform.CreateNClusterMembers(cluster, 1, platform.MachineConfig{}); err != nil {
		t.Fatal(err)
	}
	_, err = cluster.WaitForNMachines(1)
	if err != nil {
		t.Fatal(err)
	}

	if stdout, stderr, err := cluster.Fleetctl("start", "fixtures/units/hello.service"); err != nil {
		t.Fatalf("Unable to start fleet unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	units, err := cluster.WaitForNActiveUnits(1)
	if err != nil {
		t.Fatal(err)
	}
	_, found := units["hello.service"]
	if len(units) != 1 || !found {
		t.Fatalf("Expected hello.service to be sole active unit, got %v", units)
	}
}
Exemple #3
0
func TestScheduleGlobalUnits(t *testing.T) {
	// Create a three-member cluster
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)
	members, err := platform.CreateNClusterMembers(cluster, 3)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	machines, err := cluster.WaitForNMachines(m0, 3)
	if err != nil {
		t.Fatal(err)
	}

	// Launch a couple of simple units
	stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", "fixtures/units/hello.service", "fixtures/units/goodbye.service")
	if err != nil {
		t.Fatalf("Failed starting units: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	// Both units should show up active
	_, err = cluster.WaitForNActiveUnits(m0, 2)
	if err != nil {
		t.Fatal(err)
	}

	// Now add a global unit
	stdout, stderr, err = cluster.Fleetctl(m0, "start", "--no-block", "fixtures/units/global.service")
	if err != nil {
		t.Fatalf("Failed starting unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	// Should see 2 + 3 units
	states, err := cluster.WaitForNActiveUnits(m0, 5)
	if err != nil {
		t.Fatal(err)
	}

	// Each machine should have a single global unit
	us := states["global.service"]
	for _, mach := range machines {
		var found bool
		for _, state := range us {
			if state.Machine == mach {
				found = true
				break
			}
		}
		if !found {
			t.Fatalf("Did not find global unit on machine %v", mach)
			t.Logf("Found unit states:")
			for _, state := range states {
				t.Logf("%#v", state)
			}
		}
	}
}
Exemple #4
0
// Ensure units can be scheduled directly to a given machine using the
// MachineID unit option.
func TestScheduleConditionMachineID(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	// Start with a simple three-node cluster
	members, err := platform.CreateNClusterMembers(cluster, 3)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	machines, err := cluster.WaitForNMachines(m0, 3)
	if err != nil {
		t.Fatal(err)
	}

	// Start 3 units that are each scheduled to one of our machines
	schedule := make(map[string]string)
	for _, machine := range machines {
		contents := `
[Service]
ExecStart=/bin/bash -c "while true; do echo Hello, World!; sleep 1; done"

[X-Fleet]
MachineID=%s
`
		unitFile, err := util.TempUnit(fmt.Sprintf(contents, machine))
		if err != nil {
			t.Fatalf("Failed creating temporary unit: %v", err)
		}
		defer os.Remove(unitFile)

		stdout, stderr, err := cluster.Fleetctl(m0, "start", unitFile)
		if err != nil {
			t.Fatalf("Failed starting unit file %s: \nstdout: %s\nstderr: %s\nerr: %v", unitFile, stdout, stderr, err)
		}

		unit := filepath.Base(unitFile)
		schedule[unit] = machine
	}

	// Block until our three units have been started
	active, err := cluster.WaitForNActiveUnits(m0, 3)
	if err != nil {
		t.Fatal(err)
	}
	states, err := util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	for unit, unitState := range states {
		if unitState.Machine != schedule[unit] {
			t.Errorf("Unit %s was scheduled to %s, expected %s", unit, unitState.Machine, schedule[unit])
		}
	}
}
func TestUnitSubmit(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy()

	if err := platform.CreateNClusterMembers(cluster, 1, platform.MachineConfig{}); err != nil {
		t.Fatal(err)
	}
	_, err = cluster.WaitForNMachines(1)
	if err != nil {
		t.Fatal(err)
	}

	// submit a unit and assert it shows up
	if _, _, err := cluster.Fleetctl("submit", "fixtures/units/hello.service"); err != nil {
		t.Fatalf("Unable to submit fleet unit: %v", err)
	}
	stdout, _, err := cluster.Fleetctl("list-units", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-units: %v", err)
	}
	units := strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != 1 {
		t.Fatalf("Did not find 1 unit in cluster: \n%s", stdout)
	}

	// submitting the same unit should not fail
	if _, _, err = cluster.Fleetctl("submit", "fixtures/units/hello.service"); err != nil {
		t.Fatalf("Expected no failure when double-submitting unit, got this: %v", err)
	}

	// destroy the unit and ensure it disappears from the unit list
	if _, _, err := cluster.Fleetctl("destroy", "fixtures/units/hello.service"); err != nil {
		t.Fatalf("Failed to destroy unit: %v", err)
	}
	stdout, _, err = cluster.Fleetctl("list-units", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-units: %v", err)
	}
	if strings.TrimSpace(stdout) != "" {
		t.Fatalf("Did not find 0 units in cluster: \n%s", stdout)
	}

	// submitting the unit after destruction should succeed
	if _, _, err := cluster.Fleetctl("submit", "fixtures/units/hello.service"); err != nil {
		t.Fatalf("Unable to submit fleet unit: %v", err)
	}
	stdout, _, err = cluster.Fleetctl("list-units", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-units: %v", err)
	}
	units = strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != 1 {
		t.Fatalf("Did not find 1 unit in cluster: \n%s", stdout)
	}
}
Exemple #6
0
func TestKnownHostsVerification(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy()

	members, err := platform.CreateNClusterMembers(cluster, 2)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	machines, err := cluster.WaitForNMachines(m0, 2)
	if err != nil {
		t.Fatal(err)
	}
	machine := machines[0]

	tmp, err := ioutil.TempFile(os.TempDir(), "known-hosts")
	if err != nil {
		t.Fatal(err)
	}
	tmp.Close()
	defer syscall.Unlink(tmp.Name())

	khFile := tmp.Name()

	if stdout, stderr, err := cluster.FleetctlWithInput(m0, "yes", "--strict-host-key-checking=true", fmt.Sprintf("--known-hosts-file=%s", khFile), "ssh", machine, "uptime"); err != nil {
		t.Errorf("Unable to SSH into fleet machine: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	_, err = cluster.ReplaceMember(members[1])
	if err != nil {
		t.Fatalf("Failed replacing machine: %v", err)
	}

	machines, err = cluster.WaitForNMachines(m0, 2)
	if err != nil {
		t.Fatal(err)
	}
	machine = machines[0]

	// SSH'ing to the cluster member should now fail with a host key mismatch
	if _, _, err := cluster.Fleetctl(m0, "--strict-host-key-checking=true", fmt.Sprintf("--known-hosts-file=%s", khFile), "ssh", machine, "uptime"); err == nil {
		t.Errorf("Expected error while SSH'ing to fleet machine")
	}

	// Overwrite the known-hosts file to simulate removing the old host key
	if err := ioutil.WriteFile(khFile, []byte{}, os.FileMode(0644)); err != nil {
		t.Fatalf("Unable to overwrite known-hosts file: %v", err)
	}

	// And SSH should work again
	if stdout, stderr, err := cluster.FleetctlWithInput(m0, "yes", "--strict-host-key-checking=true", fmt.Sprintf("--known-hosts-file=%s", khFile), "ssh", machine, "uptime"); err != nil {
		t.Errorf("Unable to SSH into fleet machine: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

}
func TestUnitSSHActions(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy()

	if err := platform.CreateNClusterMembers(cluster, 1, platform.MachineConfig{}); err != nil {
		t.Fatal(err)
	}
	_, err = cluster.WaitForNMachines(1)
	if err != nil {
		t.Fatal(err)
	}

	if _, _, err := cluster.Fleetctl("start", "--no-block", "fixtures/units/hello.service"); err != nil {
		t.Fatalf("Unable to start fleet unit: %v", err)
	}

	units, err := cluster.WaitForNActiveUnits(1)
	if err != nil {
		t.Fatal(err)
	}

	_, found := units["hello.service"]
	if len(units) != 1 || !found {
		t.Fatalf("Expected hello.service to be sole active unit, got %v", units)
	}

	stdout, _, err := cluster.Fleetctl("--strict-host-key-checking=false", "ssh", "hello.service", "echo", "foo")
	if err != nil {
		t.Errorf("Failure occurred while calling fleetctl ssh: %v", err)
	}

	if !strings.Contains(stdout, "foo") {
		t.Errorf("Could not find expected string in command output:\n%s", stdout)
	}

	stdout, _, err = cluster.Fleetctl("--strict-host-key-checking=false", "status", "hello.service")
	if err != nil {
		t.Errorf("Failure occurred while calling fleetctl status: %v", err)
	}

	if !strings.Contains(stdout, "Active: active") {
		t.Errorf("Could not find expected string in status output:\n%s", stdout)
	}

	stdout, _, err = cluster.Fleetctl("--strict-host-key-checking=false", "journal", "hello.service")
	if err != nil {
		t.Errorf("Failure occurred while calling fleetctl journal: %v", err)
	}

	if !strings.Contains(stdout, "Hello, World!") {
		t.Errorf("Could not find expected string in journal output:\n%s", stdout)
	}
}
Exemple #8
0
func TestScheduleOneWayConflict(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy()

	// Start with a simple three-node cluster
	if err := platform.CreateNClusterMembers(cluster, 1, platform.MachineConfig{}); err != nil {
		t.Fatal(err)
	}
	if _, err := cluster.WaitForNMachines(1); err != nil {
		t.Fatal(err)
	}

	// Start a unit that conflicts with a yet-to-be-scheduled unit
	name := "fixtures/units/conflicts-with-hello.service"
	if _, _, err := cluster.Fleetctl("start", name); err != nil {
		t.Fatalf("Failed starting unit %s: %v", name, err)
	}

	// Start a unit that has not defined conflicts
	name = "fixtures/units/hello.service"
	if _, _, err := cluster.Fleetctl("start", name); err == nil {
		t.Fatalf("Unit %s unexpectedly started", name)
	}

	// Both units should show up, but only conflicts-with-hello.service
	// should report ACTIVE
	stdout, _, err := cluster.Fleetctl("list-units", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-units: %v", err)
	}
	units := strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != 2 {
		t.Fatalf("Did not find two units in cluster: \n%s", stdout)
	}
	states, err := cluster.WaitForNActiveUnits(1)
	if err != nil {
		t.Fatal(err)
	}

	for unit, _ := range states {
		if unit != "conflicts-with-hello.service" {
			t.Error("Incorrect unit started:", unit)
		}
	}
}
Exemple #9
0
func TestScheduleOneWayConflict(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	// Start with a simple three-node cluster
	members, err := platform.CreateNClusterMembers(cluster, 1)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	if _, err := cluster.WaitForNMachines(m0, 1); err != nil {
		t.Fatal(err)
	}

	// Start a unit that conflicts with a yet-to-be-scheduled unit
	name := "fixtures/units/conflicts-with-hello.service"
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err)
	}

	active, err := cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	states, err := util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	// Start a unit that has not defined conflicts
	name = "fixtures/units/hello.service"
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err)
	}

	// Both units should show up, but only conflicts-with-hello.service
	// should report ACTIVE
	stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-unit-files: %v", err)
	}
	units := strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != 2 {
		t.Fatalf("Did not find two units in cluster: \n%s", stdout)
	}
	active, err = cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	states, err = util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	for unit := range states {
		if unit != "conflicts-with-hello.service" {
			t.Error("Incorrect unit started:", unit)
		}
	}

	// Destroying the conflicting unit should allow the other to start
	name = "conflicts-with-hello.service"
	if _, _, err := cluster.Fleetctl(m0, "destroy", name); err != nil {
		t.Fatalf("Failed destroying %s", name)
	}

	// NOTE: we need to sleep here shortly to avoid occasional errors of
	// conflicts-with-hello.service being rescheduled even after being destroyed.
	// In that case, the conflicts unit remains active, while the original
	// hello.service remains inactive. Then the test TestScheduleOneWayConflict
	// fails at the end with a message "Incorrect unit started".
	// This error seems to occur frequently when enable_grpc turned on.
	// - dpark 20160615
	time.Sleep(1 * time.Second)

	// Wait for the destroyed unit to actually disappear
	timeout, err := util.WaitForState(
		func() bool {
			stdout, _, err := cluster.Fleetctl(m0, "list-units", "--no-legend", "--full", "--fields", "unit,active,machine")
			if err != nil {
				return false
			}
			lines := strings.Split(strings.TrimSpace(stdout), "\n")
			states := util.ParseUnitStates(lines)
			for _, state := range states {
				if state.Name == name {
					return false
				}
			}
			return true
		},
	)
	if err != nil {
		t.Fatalf("Destroyed unit %s not gone within %v", name, timeout)
	}

	active, err = cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	states, err = util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}
	for unit := range states {
		if unit != "hello.service" {
			t.Error("Incorrect unit started:", unit)
		}
	}

}
Exemple #10
0
// Start three machines and test template units based on machines Metadata
func TestTemplatesWithSpecifiersInMetadata(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	members, err := platform.CreateNClusterMembers(cluster, 3)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	_, err = cluster.WaitForNMachines(m0, 3)
	if err != nil {
		t.Fatal(err)
	}

	// Submit one template
	if stdout, stderr, err := cluster.Fleetctl(m0, "submit", "fixtures/units/[email protected]"); err != nil {
		t.Fatalf("Unable to submit [email protected] template: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	// Start units based on template in backward order
	for i := len(members) - 1; i >= 0; i-- {
		if stdout, stderr, err := cluster.Fleetctl(m0, "start", fmt.Sprintf("fixtures/units/metadata@smoke%s.service", members[i].ID())); err != nil {
			t.Fatalf("Unable to start template based unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
		}
	}

	_, err = cluster.WaitForNActiveUnits(m0, 3)
	if err != nil {
		t.Fatal(err)
	}

	stdout, stderr, err := cluster.Fleetctl(m0, "list-units", "--no-legend", "--full", "--fields", "unit,active,machine")
	if err != nil {
		t.Fatalf("Unable to get submitted units: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	ndesired := 3
	stdout = strings.TrimSpace(stdout)
	lines := strings.Split(stdout, "\n")
	allStates := util.ParseUnitStates(lines)
	active := util.FilterActiveUnits(allStates)
	nactive := len(active)
	if nactive != ndesired {
		t.Fatalf("Failed to get %d active units: \nstdout: %s\nstderr: %s", ndesired, stdout, stderr)
	}

	for _, state := range active {
		re := regexp.MustCompile(`@([^.]*)`)
		desiredMachine := re.FindStringSubmatch(state.Name)
		if len(desiredMachine) < 2 {
			t.Fatalf("Cannot parse state.Name (%v): \nstdout: %s\nstderr: %s", state.Name, stdout, stderr)
		}
		currentMachine := fmt.Sprintf("smoke%s", state.Machine)
		if desiredMachine[1] != currentMachine {
			t.Fatalf("Template (%s) has been scheduled on wrong machine (%s): \nstdout: %s\nstderr: %s", state.Name, currentMachine, stdout, stderr)
		}
	}

	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--block-attempts=20", "fixtures/units/[email protected]"); err == nil {
		t.Fatalf("metadata@invalid unit should not be scheduled: \nstdout: %s\nstderr: %s", stdout, stderr)
	}
}
Exemple #11
0
// Ensure an existing unit migrates to an unoccupied machine
// if its host goes down.
func TestDynamicClusterNewMemberUnitMigration(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy()

	// Start with a 4-node cluster
	members, err := platform.CreateNClusterMembers(cluster, 4)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	if _, err = cluster.WaitForNMachines(m0, 4); err != nil {
		t.Fatal(err)
	}

	// Start 3 conflicting units on the 4-node cluster
	_, _, err = cluster.Fleetctl(m0, "start",
		"fixtures/units/conflict.0.service",
		"fixtures/units/conflict.1.service",
		"fixtures/units/conflict.2.service",
	)
	if err != nil {
		t.Errorf("Failed starting units: %v", err)
	}

	// All 3 services should be visible immediately, and all of them should
	// become ACTIVE shortly thereafter
	stdout, _, err := cluster.Fleetctl(m0, "list-units", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-units: %v", err)
	}
	units := strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != 3 {
		t.Fatalf("Did not find 3 units in cluster: \n%s", stdout)
	}
	active, err := cluster.WaitForNActiveUnits(m0, 3)
	if err != nil {
		t.Fatal(err)
	}

	// Ensure each unit is only running on a single machine
	states, err := util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	// Kill one of the machines and make sure the unit migrates somewhere else
	unit := "conflict.1.service"
	oldMach := states[unit].Machine
	oldIP := states[unit].IP
	if _, _, err = cluster.Fleetctl(m0, "--strict-host-key-checking=false", "ssh", oldMach, "sudo", "systemctl", "stop", "fleet"); err != nil {
		t.Fatal(err)
	}
	var mN platform.Member
	if m0.IP() == oldIP {
		mN = members[1]
	} else {
		mN = m0
	}

	if _, err = cluster.WaitForNMachines(mN, 3); err != nil {
		t.Fatal(err)
	}
	newActive, err := cluster.WaitForNActiveUnits(mN, 3)
	if err != nil {
		t.Fatal(err)
	}
	// Ensure each unit is only running on a single machine
	newStates, err := util.ActiveToSingleStates(newActive)
	if err != nil {
		t.Fatal(err)
	}

	newMach := newStates[unit].Machine
	if newMach == oldMach {
		t.Fatalf("Unit %s did not migrate from machine %s to %s", unit, oldMach, newMach)
	}

	// Ensure no other units migrated due to this churn
	if newMach == states["conflict.0.service"].Machine || newMach == states["conflict.2.service"].Machine {
		t.Errorf("Unit %s landed on occupied machine", unit)
	}

	if states["conflict.0.service"].Machine != newStates["conflict.0.service"].Machine || states["conflict.2.service"].Machine != newStates["conflict.2.service"].Machine {
		t.Errorf("Unit caused unnecessary churn in the cluster")
	}
}
Exemple #12
0
// Simulate rebooting a single member of a fleet cluster
func TestDynamicClusterMemberReboot(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy()

	// Start with a simple three-node cluster
	members, err := platform.CreateNClusterMembers(cluster, 3)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	if _, err = cluster.WaitForNMachines(m0, 3); err != nil {
		t.Fatal(err)
	}

	_, _, err = cluster.Fleetctl(m0, "start",
		"fixtures/units/conflict.0.service",
		"fixtures/units/conflict.1.service",
		"fixtures/units/conflict.2.service",
	)
	if err != nil {
		t.Errorf("Failed starting units: %v", err)
	}

	// All 3 services should be visible immediately, and all of them should
	// become ACTIVE shortly thereafter
	stdout, _, err := cluster.Fleetctl(m0, "list-units", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-units: %v", err)
	}
	units := strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != 3 {
		t.Fatalf("Did not find 3 units in cluster: \n%s", stdout)
	}
	oldActive, err := cluster.WaitForNActiveUnits(m0, 3)
	if err != nil {
		t.Fatal(err)
	}

	oldStates, err := util.ActiveToSingleStates(oldActive)
	if err != nil {
		t.Fatal(err)
	}

	// Simulate a reboot by recreating one of the cluster members
	if _, err := cluster.ReplaceMember(cluster.Members()[1]); err != nil {
		t.Fatalf("replace failed: %v", err)
	}
	newActive, err := cluster.WaitForNActiveUnits(m0, 3)
	if err != nil {
		t.Fatal(err)
	}
	newStates, err := util.ActiveToSingleStates(newActive)
	if err != nil {
		t.Fatal(err)
	}

	migrated := 0
	for _, unit := range []string{"conflict.0.service", "conflict.1.service", "conflict.2.service"} {
		if oldStates[unit].Machine != newStates[unit].Machine {
			migrated += 1
		}
	}

	if migrated != 1 {
		t.Errorf("Expected 1 unit to migrate, but found %d", migrated)
		t.Logf("Initial state: %#v", oldStates)
		t.Logf("Post-reboot state: %#v", newStates)
	}
}
func TestScheduleOneWayConflict(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy()

	// Start with a simple three-node cluster
	members, err := platform.CreateNClusterMembers(cluster, 1)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	if _, err := cluster.WaitForNMachines(m0, 1); err != nil {
		t.Fatal(err)
	}

	// Start a unit that conflicts with a yet-to-be-scheduled unit
	name := "fixtures/units/conflicts-with-hello.service"
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err)
	}

	active, err := cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	states, err := util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	// Start a unit that has not defined conflicts
	name = "fixtures/units/hello.service"
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err)
	}

	// Both units should show up, but only conflicts-with-hello.service
	// should report ACTIVE
	stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-unit-files: %v", err)
	}
	units := strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != 2 {
		t.Fatalf("Did not find two units in cluster: \n%s", stdout)
	}
	active, err = cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	states, err = util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	for unit := range states {
		if unit != "conflicts-with-hello.service" {
			t.Error("Incorrect unit started:", unit)
		}
	}

	// Destroying the conflicting unit should allow the other to start
	name = "conflicts-with-hello.service"
	if _, _, err := cluster.Fleetctl(m0, "destroy", name); err != nil {
		t.Fatalf("Failed destroying %s", name)
	}
	// TODO(jonboulle): fix this race. Since we no longer immediately
	// remove unit state on unit destruction (and instead wait for
	// UnitStateGenerator/UnitStatePublisher to clean up), the old unit
	// shows up as active for quite some time.
	time.Sleep(5 * time.Second)
	stdout, _, err = cluster.Fleetctl(m0, "list-units", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-units: %v", err)
	}
	units = strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != 1 {
		t.Fatalf("Did not find one unit in cluster: \n%s", stdout)
	}
	active, err = cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	states, err = util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}
	for unit := range states {
		if unit != "hello.service" {
			t.Error("Incorrect unit started:", unit)
		}
	}

}
Exemple #14
0
// Simulate rebooting a single member of a fleet cluster
func TestDynamicClusterMemberReboot(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy()

	// Start with a simple three-node cluster
	if err := platform.CreateNClusterMembers(cluster, 3, platform.MachineConfig{}); err != nil {
		t.Fatal(err)
	}
	if _, err = cluster.WaitForNMachines(3); err != nil {
		t.Fatal(err)
	}

	if _, _, err := cluster.Fleetctl("start",
		"fixtures/units/conflict.0.service",
		"fixtures/units/conflict.1.service",
		"fixtures/units/conflict.2.service",
	); err != nil {
		t.Errorf("Failed starting units: %v", err)
	}

	// All 3 services should be visible immediately, and all of them should
	// become ACTIVE shortly thereafter
	stdout, _, err := cluster.Fleetctl("list-units", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-units: %v", err)
	}
	units := strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != 3 {
		t.Fatalf("Did not find 3 units in cluster: \n%s", stdout)
	}
	oldStates, err := cluster.WaitForNActiveUnits(3)
	if err != nil {
		t.Fatal(err)
	}

	// Simulate a reboot by recreating one of the cluster members
	member := cluster.Members()[1]
	if _, err = cluster.MemberCommand(member, "sudo", "systemctl", "stop", "fleet"); err != nil {
		t.Fatal(err)
	}
	if err = cluster.DestroyMember(member); err != nil {
		t.Fatal(err)
	}
	if _, err = cluster.WaitForNMachines(2); err != nil {
		t.Fatal(err)
	}
	if _, err = cluster.WaitForNActiveUnits(2); err != nil {
		t.Fatal(err)
	}
	if err = cluster.CreateMember(member, platform.MachineConfig{}); err != nil {
		t.Fatal(err)
	}
	if _, err = cluster.WaitForNMachines(3); err != nil {
		t.Fatal(err)
	}
	newStates, err := cluster.WaitForNActiveUnits(3)
	if err != nil {
		t.Fatal(err)
	}

	for _, unit := range []string{"conflict.0.service", "conflict.1.service", "conflict.2.service"} {
		if oldStates[unit].Machine != newStates[unit].Machine {
			t.Fatalf("Unit %s migrated unexpectedly", unit)
		}
	}
}
Exemple #15
0
// TestScheduleReplace starts 1 unit, followed by starting another unit
// that replaces the 1st unit. Then it verifies that the 2 units are
// started on different machines.
func TestScheduleReplace(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	members, err := platform.CreateNClusterMembers(cluster, 2)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	if _, err := cluster.WaitForNMachines(m0, 2); err != nil {
		t.Fatal(err)
	}

	// Start a unit without Replaces
	uNames := []string{
		"fixtures/units/replace.0.service",
		"fixtures/units/replace.1.service",
	}
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[0]); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[0], stdout, stderr, err)
	}

	active, err := cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	_, err = util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	// Start a unit that replaces the former one, replace.0.service
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[1]); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[1], stdout, stderr, err)
	}

	// Check that both units should show up
	stdout, stderr, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-unit-files:\nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}
	units := strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != 2 {
		t.Fatalf("Did not find two units in cluster: \n%s", stdout)
	}
	active, err = cluster.WaitForNActiveUnits(m0, 2)
	if err != nil {
		t.Fatal(err)
	}
	states, err := util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	// Check that the unit 1 is located on a different machine from that of unit 0
	nUnits := 2
	uNameBase := make([]string, nUnits)
	machs := make([]string, nUnits)
	for i, uName := range uNames {
		uNameBase[i] = path.Base(uName)
		machs[i] = states[uNameBase[i]].Machine
	}
	if machs[0] == machs[1] {
		t.Fatalf("machine for %s is %s, the same as that of %s.", uNameBase[0], machs[0], uNameBase[1])
	}
}
Exemple #16
0
func TestScheduleOneWayConflict(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	// Start with a simple three-node cluster
	members, err := platform.CreateNClusterMembers(cluster, 1)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	if _, err := cluster.WaitForNMachines(m0, 1); err != nil {
		t.Fatal(err)
	}

	// Start a unit that conflicts with a yet-to-be-scheduled unit
	name := "fixtures/units/conflicts-with-hello.service"
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err)
	}

	active, err := cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	states, err := util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	// Start a unit that has not defined conflicts
	name = "fixtures/units/hello.service"
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err)
	}

	// Both units should show up, but only conflicts-with-hello.service
	// should report ACTIVE
	stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-unit-files: %v", err)
	}
	units := strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != 2 {
		t.Fatalf("Did not find two units in cluster: \n%s", stdout)
	}
	active, err = cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	states, err = util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	for unit := range states {
		if unit != "conflicts-with-hello.service" {
			t.Error("Incorrect unit started:", unit)
		}
	}

	// Destroying the conflicting unit should allow the other to start
	name = "conflicts-with-hello.service"
	if _, _, err := cluster.Fleetctl(m0, "destroy", name); err != nil {
		t.Fatalf("Failed destroying %s", name)
	}

	// Wait for the destroyed unit to actually disappear
	timeout, err := util.WaitForState(
		func() bool {
			stdout, _, err := cluster.Fleetctl(m0, "list-units", "--no-legend", "--full", "--fields", "unit,active,machine")
			if err != nil {
				return false
			}
			lines := strings.Split(strings.TrimSpace(stdout), "\n")
			states := util.ParseUnitStates(lines)
			for _, state := range states {
				if state.Name == name {
					return false
				}
			}
			return true
		},
	)
	if err != nil {
		t.Fatalf("Destroyed unit %s not gone within %v", name, timeout)
	}

	active, err = cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	states, err = util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}
	for unit := range states {
		if unit != "hello.service" {
			t.Error("Incorrect unit started:", unit)
		}
	}

}
Exemple #17
0
func TestScheduleGlobalUnits(t *testing.T) {
	// Create a three-member cluster
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)
	numGUnits := 3
	numAllUnits := 5
	members, err := platform.CreateNClusterMembers(cluster, numGUnits)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	machines, err := cluster.WaitForNMachines(m0, numGUnits)
	if err != nil {
		t.Fatal(err)
	}

	// Launch a couple of simple units
	stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", "fixtures/units/hello.service", "fixtures/units/goodbye.service")
	if err != nil {
		t.Fatalf("Failed starting units: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	// Both units should show up active
	_, err = cluster.WaitForNActiveUnits(m0, 2)
	if err != nil {
		t.Fatal(err)
	}

	// Now add a global unit
	globalLongPath := "fixtures/units/global.service"
	stdout, stderr, err = cluster.Fleetctl(m0, "start", "--no-block", globalLongPath)
	if err != nil {
		t.Fatalf("Failed starting unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	// Should see 2 + 3 units
	states, err := cluster.WaitForNActiveUnits(m0, numAllUnits)
	if err != nil {
		t.Fatal(err)
	}

	// Each machine should have a single global unit
	globalBase := path.Base(globalLongPath)
	us := states[globalBase]
	for _, mach := range machines {
		var found bool
		for _, state := range us {
			if state.Machine == mach {
				found = true
				break
			}
		}
		if !found {
			t.Fatalf("Did not find global unit on machine %v", mach)
			t.Logf("Found unit states:")
			for _, state := range states {
				t.Logf("%#v", state)
			}
		}
	}

	stdout, stderr, err = cluster.Fleetctl(m0, "status", "--no-block", globalBase)
	if err != nil {
		t.Fatalf("Failed getting unit status: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	// restarting global units
	cmd := "restart"
	stdout, stderr, err = cluster.Fleetctl(m0, cmd, globalBase)
	if err != nil {
		t.Fatalf("Failed restarting global unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	outmap, err := waitForNUnitsStart(cluster, m0, numAllUnits)
	if err != nil {
		t.Fatalf("Failed listing global units: %v", err)
	}
	glist, _ := outmap[globalBase]
	if len(glist) != numGUnits {
		t.Fatalf("Did not find %d global units: got %d", numGUnits, len(glist))
	}
}
Exemple #18
0
// TestScheduleGlobalConflicts starts 2 global units that conflict with each
// other, and check if only the first one can be found.
func TestScheduleGlobalConflicts(t *testing.T) {
	// Create a three-member cluster
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)
	members, err := platform.CreateNClusterMembers(cluster, 3)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	machines, err := cluster.WaitForNMachines(m0, 3)
	if err != nil {
		t.Fatal(err)
	}

	cfGlobal0 := "fixtures/units/conflict-global.0.service"
	cfGlobal1 := "fixtures/units/conflict-global.1.service"

	// Launch a global unit
	stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", cfGlobal0)
	if err != nil {
		t.Fatalf("Failed starting units: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	// the global unit should show up active on 3 machines
	_, err = cluster.WaitForNActiveUnits(m0, 3)
	if err != nil {
		t.Fatal(err)
	}

	// Now add another global unit, which actually should not be started.
	stdout, stderr, err = cluster.Fleetctl(m0, "start", "--no-block", cfGlobal1)
	if err != nil {
		t.Fatalf("Failed starting unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	// Should see only 3 units
	states, err := cluster.WaitForNActiveUnits(m0, 3)
	if err != nil {
		t.Fatal(err)
	}

	// Each machine should have a single global unit conflict-global.0.service,
	// but not conflict-global.1.service.
	us0 := states[path.Base(cfGlobal0)]
	us1 := states[path.Base(cfGlobal1)]
	for _, mach := range machines {
		var found bool
		for _, state := range us0 {
			if state.Machine == mach {
				found = true
				break
			}
		}
		if !found {
			t.Fatalf("Did not find global unit on machine %v", mach)
			t.Logf("Found unit states:")
			for _, state := range states {
				t.Logf("%#v", state)
			}
		}

		found = false
		for _, state := range us1 {
			if state.Machine == mach {
				found = true
				break
			}
		}
		if found {
			t.Fatalf("Did find global unit %s on machine %v", us1, mach)
			t.Logf("Global units were not conflicted as expected.")
		}
	}
}
Exemple #19
0
// Start three units using ssh tunnel
func TestTunnelScheduleBatchUnits(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	members, err := platform.CreateNClusterMembers(cluster, 3)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	_, err = cluster.WaitForNMachines(m0, 3)
	if err != nil {
		t.Fatal(err)
	}

	tmp, err := ioutil.TempFile(os.TempDir(), "known-hosts")
	if err != nil {
		t.Fatal(err)
	}
	tmp.Close()
	defer syscall.Unlink(tmp.Name())

	khFile := tmp.Name()

	// Launch one unit
	if stdout, stderr, err := cluster.FleetctlWithInput(m0, "yes",
		fmt.Sprintf("--tunnel=%s", m0.IP()),
		"--strict-host-key-checking=true",
		fmt.Sprintf("--known-hosts-file=%s", khFile),
		"start",
		"fixtures/units/hello.service"); err != nil {
		t.Fatalf("Unable to submit one unit using ssh tunnel: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	} else if strings.Contains(stderr, "Error") {
		t.Fatalf("Failed to correctly submit unit using ssh tunnel: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	// Combine all parameters and units in one args slice
	args := []string{
		fmt.Sprintf("--tunnel=%s", m0.IP()),
		"--strict-host-key-checking=true",
		fmt.Sprintf("--known-hosts-file=%s", khFile),
		"start",
	}
	for i := 1; i <= 10; i++ {
		args = append(args, fmt.Sprintf("fixtures/units/hello@%d.service", i))
	}

	// Launch a batch of units
	if stdout, stderr, err := cluster.Fleetctl(m0, args...); err != nil {
		t.Fatalf("Unable to submit batch of units using ssh tunnel: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	} else if strings.Contains(stderr, "Error") {
		t.Fatalf("Failed to correctly submit batch of units using ssh tunnel: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	_, err = cluster.WaitForNActiveUnits(m0, 11)
	if err != nil {
		t.Fatal(err)
	}
}
Exemple #20
0
// TestDetectMachineId checks for etcd registration failing on a duplicated
// machine-id on different machines.
// First it creates a cluster with 2 members, m0 and m1. Then make their
// machine IDs the same as each other, by explicitly setting the m1's ID to
// the same as m0's. Test succeeds when an error returns, while test fails
// when nothing happens.
func TestDetectMachineId(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	members, err := platform.CreateNClusterMembers(cluster, 2)
	if err != nil {
		t.Fatal(err)
	}

	m0 := members[0]
	m1 := members[1]
	_, err = cluster.WaitForNMachines(m0, 2)
	if err != nil {
		t.Fatal(err)
	}

	machineIdFile := "/etc/machine-id"

	// Restart fleet service, and check if its systemd status is still active.
	restartFleetService := func(m platform.Member) error {
		stdout, err := cluster.MemberCommand(m, "sudo", "systemctl", "restart", "fleet.service")
		if err != nil {
			return fmt.Errorf("Failed to restart fleet service\nstdout: %s\nerr: %v", stdout, err)
		}

		stdout, _ = cluster.MemberCommand(m, "systemctl", "show", "--property=ActiveState", "fleet")
		if strings.TrimSpace(stdout) != "ActiveState=active" {
			return fmt.Errorf("Fleet unit not reported as active: %s", stdout)
		}
		stdout, _ = cluster.MemberCommand(m, "systemctl", "show", "--property=Result", "fleet")
		if strings.TrimSpace(stdout) != "Result=success" {
			return fmt.Errorf("Result for fleet unit not reported as success: %s", stdout)
		}
		return nil
	}

	stdout, err := cluster.MemberCommand(m0, "cat", machineIdFile)
	if err != nil {
		t.Fatalf("Failed to get machine-id\nstdout: %s\nerr: %v", stdout, err)
	}
	m0_machine_id := strings.TrimSpace(stdout)

	// If the two machine IDs are different with each other,
	// set the m1's ID to the same one as m0, to intentionally
	// trigger an error case of duplication of machine ID.
	stdout, err = cluster.MemberCommand(m1,
		"echo", m0_machine_id, "|", "sudo", "tee", machineIdFile)
	if err != nil {
		t.Fatalf("Failed to replace machine-id\nstdout: %s\nerr: %v", stdout, err)
	}

	if err := restartFleetService(m1); err != nil {
		t.Fatal(err)
	}

	// fleetd should actually be running, but failing to list machines.
	// So we should expect a specific error after running fleetctl list-machines,
	// like "googlapi: Error 503: fleet server unable to communicate with etcd".
	stdout, stderr, err := cluster.Fleetctl(m1, "list-machines", "--no-legend")
	if err != nil {
		if !strings.Contains(err.Error(), "exit status 1") ||
			!strings.Contains(stderr, "fleet server unable to communicate with etcd") {
			t.Fatalf("m1: Failed to get list of machines. err: %v\nstderr: %s", err, stderr)
		}
		// If both conditions are satisfied, "exit status 1" and
		// "...unable to communicate...", then it's an expected error. PASS.
	} else {
		t.Fatalf("m1: should get an error, but got success.\nstderr: %s", stderr)
	}

	// Trigger another test case of m0's ID getting different from m1's.
	// Then it's expected that m0 and m1 would be working properly with distinct
	// machine IDs, after having restarted fleet.service both on m0 and m1.
	stdout, err = cluster.MemberCommand(m0,
		"echo", util.NewMachineID(), "|", "sudo", "tee", machineIdFile)
	if err != nil {
		t.Fatalf("m0: Failed to replace machine-id\nstdout: %s\nerr: %v", stdout, err)
	}

	// Restart fleet service on m0, and see that it's still working.
	if err := restartFleetService(m0); err != nil {
		t.Fatal(err)
	}

	stdout, stderr, err = cluster.Fleetctl(m0, "list-machines", "--no-legend")
	if err != nil {
		t.Fatalf("m0: error: %v\nstdout: %s\nstderr: %s", err, stdout, stderr)
	}
}
Exemple #21
0
// Start three pairs of services, asserting each pair land on the same
// machine due to the MachineOf options in the unit files.
func TestScheduleMachineOf(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	// Start with a simple three-node cluster
	members, err := platform.CreateNClusterMembers(cluster, 3)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	machines, err := cluster.WaitForNMachines(m0, 3)
	if err != nil {
		t.Fatal(err)
	}

	// Ensure we can SSH into each machine using fleetctl
	for _, machine := range machines {
		if stdout, stderr, err := cluster.Fleetctl(m0, "--strict-host-key-checking=false", "ssh", machine, "uptime"); err != nil {
			t.Errorf("Unable to SSH into fleet machine: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
		}
	}

	// Start the 3 pairs of services
	for i := 0; i < 3; i++ {
		ping := fmt.Sprintf("fixtures/units/ping.%d.service", i)
		pong := fmt.Sprintf("fixtures/units/pong.%d.service", i)
		stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", ping, pong)
		if err != nil {
			t.Errorf("Failed starting units: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
		}
	}

	// All 6 services should be visible immediately and become ACTIVE
	// shortly thereafter
	stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-unit-files: %v", err)
	}
	units := strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != 6 {
		t.Fatalf("Did not find six units in cluster: \n%s", stdout)
	}
	active, err := cluster.WaitForNActiveUnits(m0, 6)
	if err != nil {
		t.Fatal(err)
	}
	states, err := util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	for i := 0; i < 3; i++ {
		ping := fmt.Sprintf("ping.%d.service", i)
		pingState, ok := states[ping]
		if !ok {
			t.Errorf("Failed to find state for %s", ping)
			continue
		}

		pong := fmt.Sprintf("pong.%d.service", i)
		pongState, ok := states[pong]
		if !ok {
			t.Errorf("Failed to find state for %s", pong)
			continue
		}

		if len(pingState.Machine) == 0 {
			t.Errorf("Unit %s is not reporting machine", ping)
		}

		if len(pongState.Machine) == 0 {
			t.Errorf("Unit %s is not reporting machine", pong)
		}

		if pingState.Machine != pongState.Machine {
			t.Errorf("Units %s and %s are not on same machine", ping, pong)
		}
	}

	// Ensure a pair of units migrate together when their host goes down
	mach := states["ping.1.service"].Machine
	if _, _, err = cluster.Fleetctl(m0, "--strict-host-key-checking=false", "ssh", mach, "sudo", "systemctl", "stop", "fleet"); err != nil {
		t.Fatal(err)
	}

	var mN platform.Member
	if m0.ID() == states["ping.1.service"].Machine {
		mN = members[1]
	} else {
		mN = m0
	}

	if _, err := cluster.WaitForNMachines(mN, 2); err != nil {
		t.Fatal(err)
	}
	active, err = cluster.WaitForNActiveUnits(mN, 6)
	if err != nil {
		t.Fatal(err)
	}
	states, err = util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	newPingMach := states["ping.1.service"].Machine
	if mach == newPingMach {
		t.Fatalf("Unit ping.1.service did not appear to migrate")
	}

	newPongMach := states["pong.1.service"].Machine
	if newPingMach != newPongMach {
		t.Errorf("Unit pong.1.service did not migrate with ping.1.service")
	}
}
Exemple #22
0
// TestScheduleReplace starts 1 unit, followed by starting another unit
// that replaces the 1st unit. Then it verifies that the 2 units are
// started on different machines.
func TestScheduleReplace(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	// Start with a simple three-node cluster
	members, err := platform.CreateNClusterMembers(cluster, 2)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	if _, err := cluster.WaitForNMachines(m0, 2); err != nil {
		t.Fatal(err)
	}

	// Start a unit without Replaces
	uNames := []string{
		"fixtures/units/replace.0.service",
		"fixtures/units/replace.1.service",
	}
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[0]); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[0], stdout, stderr, err)
	}

	active, err := cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	_, err = util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	// Start a unit that replaces the former one, replace.0.service
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[1]); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[1], stdout, stderr, err)
	}

	// Check that both units should show up
	stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-unit-files: %v", err)
	}
	units := strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != 2 {
		t.Fatalf("Did not find two units in cluster: \n%s", stdout)
	}
	active, err = cluster.WaitForNActiveUnits(m0, 2)
	if err != nil {
		t.Fatal(err)
	}
	states, err := util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	// Check that the unit 1 is located on a different machine from that of unit 0
	nUnits := 2
	uNameBase := make([]string, nUnits)
	machs := make([]string, nUnits)
	for i, uName := range uNames {
		uNameBase[i] = path.Base(uName)
		machs[i] = states[uNameBase[i]].Machine
	}
	if machs[0] == machs[1] {
		t.Fatalf("machine for %s is %s, the same as that of %s.", uNameBase[0], machs[0], uNameBase[1])
	}

	// Check that circular replaces end up with 1 launched unit.
	// First of all, stop the existing unit replace.0.service.
	if stdout, stderr, err := cluster.Fleetctl(m0, "destroy", uNameBase[0]); err != nil {
		t.Fatalf("Failed to destroy unit %s: \nstdout: %s\nstderr: %s\nerr: %v",
			uNameBase[0], stdout, stderr, err)
	}

	// Generate a new service 0 derived by a fixture, make the new service
	// replace service 1, and store it under /tmp.
	uName0tmp := path.Join("/tmp", uNameBase[0])
	err = util.GenNewFleetService(uName0tmp, uNames[1],
		"Replaces=replace.1.service", "Replaces=replace.0.service")
	if err != nil {
		t.Fatalf("Failed to generate a temp fleet service: %v", err)
	}

	// Start replace.0 unit that replaces replace.1.service,
	// then fleetctl list-unit-files should show only return 1 launched unit.
	// Note that we still need to run list-units once, before doing
	// list-unit-files, for reliable tests.
	stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uName0tmp)
	if err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v",
			uName0tmp, stdout, stderr, err)
	}

	stdout, _, err = cluster.Fleetctl(m0, "list-unit-files", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-unit-files: %v", err)
	}
	units = strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != nUnits {
		t.Fatalf("Did not find two units in cluster: \n%s", stdout)
	}
	_, err = cluster.WaitForNActiveUnits(m0, nUnits)
	if err != nil {
		t.Fatal(err)
	}
	ufs, err := cluster.WaitForNUnitFiles(m0, nUnits)
	if err != nil {
		t.Fatalf("Failed to run list-unit-files: %v", err)
	}

	uStates := make([][]util.UnitFileState, nUnits)
	var found bool
	for i, unb := range uNameBase {
		uStates[i], found = ufs[unb]
		if len(ufs) != nUnits || !found {
			t.Fatalf("Did not find %d launched unit as expected: got %d\n", nUnits, len(ufs))
		}
	}
	nLaunched := 0
	for _, us := range uStates {
		for _, state := range us {
			if strings.Contains(state.State, "launched") {
				nLaunched += 1
			}
		}
	}
	if nLaunched != 1 {
		t.Fatalf("Did not find 1 launched unit as expected: got %d", nLaunched)
	}

	os.Remove(uName0tmp)
}
Exemple #23
0
// TestScheduleCircularReplace starts 2 units that tries to replace each other.
// Thus it's expected that only one of the units becomes active.
func TestScheduleCircularReplace(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	members, err := platform.CreateNClusterMembers(cluster, 2)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	if _, err := cluster.WaitForNMachines(m0, 2); err != nil {
		t.Fatal(err)
	}

	// Check that circular replaces end up with 1 launched unit.
	// To do that, generate a new service 0 that replaces service 1, and store
	// it under /tmp. Also store the original service 1 that replace 0.
	uNames := []string{
		"fixtures/units/replace.0.service",
		"fixtures/units/replace.1.service",
	}
	nUnits := 2
	nActiveUnits := 1
	uNameBase := make([]string, nUnits)
	for i, uName := range uNames {
		uNameBase[i] = path.Base(uName)
	}
	uName0tmp := path.Join("/tmp", uNameBase[0])
	err = util.GenNewFleetService(uName0tmp, uNames[1],
		"Replaces=replace.1.service", "Replaces=replace.0.service")
	if err != nil {
		t.Fatalf("Failed to generate a temp fleet service: %v", err)
	}

	// Start replace.0 unit that replaces replace.1.service,
	// then fleetctl list-unit-files should show only return 1 launched unit.
	stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uName0tmp)
	if err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v",
			uName0tmp, stdout, stderr, err)
	}

	stdout, stderr, err = cluster.Fleetctl(m0, "list-unit-files", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-unit-files:\nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}
	units := strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != nActiveUnits {
		t.Fatalf("Did not find two units in cluster: \n%s", stdout)
	}
	_, err = cluster.WaitForNActiveUnits(m0, nActiveUnits)
	if err != nil {
		t.Fatal(err)
	}
	ufs, err := cluster.WaitForNUnitFiles(m0, nActiveUnits)
	if err != nil {
		t.Fatalf("Failed to run list-unit-files: %v", err)
	}

	// Start replace.1 unit that replaces replace.0.service,
	// and then check that only 1 unit is active
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[1]); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[1], stdout, stderr, err)
	}
	stdout, stderr, err = cluster.Fleetctl(m0, "list-unit-files", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-unit-files:\nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}
	units = strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != nUnits {
		t.Fatalf("Did not find %d units in cluster: \n%s", nUnits, stdout)
	}

	active, err := cluster.WaitForNActiveUnits(m0, nActiveUnits)
	if err != nil {
		t.Fatal(err)
	}
	_, err = util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	uStates := make([][]util.UnitFileState, nUnits)
	for i, unb := range uNameBase {
		uStates[i], _ = ufs[unb]
	}
	nLaunched := 0
	for _, us := range uStates {
		for _, state := range us {
			if strings.Contains(state.State, "launched") {
				nLaunched += 1
			}
		}
	}
	if nLaunched != nActiveUnits {
		t.Fatalf("Did not find %d launched unit as expected: got %d", nActiveUnits, nLaunched)
	}

	os.Remove(uName0tmp)
}
Exemple #24
0
// TestScheduleReplace starts 3 units, followed by starting another unit
// that replaces the 1st unit. Then it verifies that the original unit
// got rescheduled on a different machine.
func TestScheduleReplace(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	members, err := platform.CreateNClusterMembers(cluster, 2)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	m1 := members[1]
	if _, err := cluster.WaitForNMachines(m0, 2); err != nil {
		t.Fatal(err)
	}

	// Start 3 units without Replaces, replace.0.service on m0, while both 1 and 2 on m1.
	// That's possible as replace.2.service has an option "MachineOf=replace.1.service".
	uNames := []string{
		"fixtures/units/replace.0.service",
		"fixtures/units/replace.1.service",
		"fixtures/units/replace.2.service",
		"fixtures/units/replace-kick0.service",
	}
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[0]); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[0], stdout, stderr, err)
	}
	if stdout, stderr, err := cluster.Fleetctl(m1, "start", "--no-block", uNames[1]); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[1], stdout, stderr, err)
	}
	if stdout, stderr, err := cluster.Fleetctl(m1, "start", "--no-block", uNames[2]); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[2], stdout, stderr, err)
	}

	active, err := cluster.WaitForNActiveUnits(m0, 3)
	if err != nil {
		t.Fatal(err)
	}
	states, err := util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	oldMach := states[path.Base(uNames[0])].Machine

	// Start a unit replace-kick0.service that replaces replace.0.service
	// Then the kick0 unit will be scheduled to m0, as m0 is least loaded than m1.
	// So it's possible to trigger a situation where kick0 could kick the original unit 0.
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[3]); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[3], stdout, stderr, err)
	}

	// Here we need to wait up to 15 seconds, to avoid races, because the unit state
	// publisher could otherwise report unit states with old machine IDs to registry.
	checkReplacedMachines := func() bool {
		// Check that 4 units show up
		nUnits := 4
		stdout, stderr, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend")
		if err != nil {
			t.Logf("Failed to run list-unit-files:\nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
			return false
		}
		units := strings.Split(strings.TrimSpace(stdout), "\n")
		if len(units) != nUnits {
			t.Logf("Did not find two units in cluster: \n%s", stdout)
			return false
		}
		active, err = cluster.WaitForNActiveUnits(m0, nUnits)
		if err != nil {
			t.Log(err)
			return false
		}
		states, err = util.ActiveToSingleStates(active)
		if err != nil {
			t.Log(err)
			return false
		}

		// Check that replace.0.service is located on a different machine from
		// that of replace-kick0.service.
		uNameBase := make([]string, nUnits)
		machs := make([]string, nUnits)
		for i, uName := range uNames {
			uNameBase[i] = path.Base(uName)
			machs[i] = states[uNameBase[i]].Machine
		}
		if machs[0] == machs[3] {
			t.Logf("machine for %s is %s, the same as that of %s.", uNameBase[0], machs[0], uNameBase[3])
			return false
		}
		if machs[3] != oldMach {
			t.Logf("machine for %s is %s, different from old machine %s.", uNameBase[3], machs[3], oldMach)
			return false
		}
		if machs[0] == oldMach {
			t.Logf("machine for %s is %s, the same as that of %s.", uNameBase[0], machs[0], oldMach)
			return false
		}

		return true
	}
	if timeout, err := util.WaitForState(checkReplacedMachines); err != nil {
		t.Fatalf("Cannot verify replaced units within %v\nerr: %v", timeout, err)
	}
}
Exemple #25
0
// Load service and discovery units and test whether discovery unit adds itself as a dependency for the service.
func TestInstallUnit(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	// Start with a two-nodes cluster
	members, err := platform.CreateNClusterMembers(cluster, 2)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	_, err = cluster.WaitForNMachines(m0, 2)
	if err != nil {
		t.Fatal(err)
	}

	// Load unit files
	stdout, stderr, err := cluster.Fleetctl(m0, "load", "fixtures/units/hello.service", "fixtures/units/discovery.service")
	if err != nil {
		t.Fatalf("Failed loading unit files: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	checkState := func(match string) bool {
		stdout, _, err := cluster.Fleetctl(m0, "--strict-host-key-checking=false", "ssh", "discovery.service", "systemctl show --property=ActiveState discovery.service")
		if err != nil {
			t.Logf("Failed getting info using remote systemctl: %v", err)
		}
		stdout = strings.TrimSpace(stdout)
		return stdout == fmt.Sprintf("ActiveState=%s", match)
	}

	// Verify that discovery.service unit is loaded but not started
	timeout, err := util.WaitForState(func() bool { return checkState("inactive") })
	if err != nil {
		t.Fatalf("discovery.service unit is not reported as inactive within %v: %v", timeout, err)
	}

	// Start hello.service unit
	stdout, stderr, err = cluster.Fleetctl(m0, "start", "fixtures/units/hello.service")
	if err != nil {
		t.Fatalf("Failed starting unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	// Verify that discovery.service unit was started
	timeout, err = util.WaitForState(func() bool { return checkState("active") })
	if err != nil {
		t.Fatalf("discovery.service unit is not reported as active within %v:\n%v", timeout, err)
	}

	// Stop hello.service unit
	stdout, stderr, err = cluster.Fleetctl(m0, "stop", "fixtures/units/hello.service")
	if err != nil {
		t.Fatalf("Failed stopping unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	// Verify that discovery.service unit was stopped
	timeout, err = util.WaitForState(func() bool { return checkState("inactive") })
	if err != nil {
		t.Fatalf("discovery.service unit is not reported as inactive within %v:\n%v", timeout, err)
	}
}