Ejemplo n.º 1
0
// waitForFleetdSocket returns if /var/run/fleet.sock exists, periodically
// checking for states.
func waitForFleetdSocket(cluster platform.Cluster, m0 platform.Member) (err error) {
	_, err = util.WaitForState(
		func() bool {
			stdout, _, _ := cluster.MemberCommand(m0, "test -S /var/run/fleet.sock && echo 1")
			if strings.TrimSpace(stdout) == "" {
				fmt.Errorf("Fleetd is not fully started, retrying...")
				return false
			}
			return true
		},
	)
	if err != nil {
		return fmt.Errorf("Fleetd socket not found: %v", err)
	}

	return nil
}
Ejemplo n.º 2
0
// WaitForNUnitFiles runs fleetctl list-unit-files to verify the actual number of units
// matched with the given expected number. It periodically runs list-unit-files
// waiting until list-unit-files actually shows the expected units.
func (nc *nspawnCluster) WaitForNUnitFiles(m Member, expectedUnits int) (map[string][]util.UnitFileState, error) {
	var nUnits int
	retStates := make(map[string][]util.UnitFileState)

	checkListUnitFiles := func() bool {
		outListUnitFiles, _, err := nc.Fleetctl(m, "list-unit-files", "--no-legend", "--full", "--fields", "unit,dstate,state")
		if err != nil {
			return false
		}
		// NOTE: There's no need to check if outListUnits is expected to be empty,
		// because ParseUnitFileStates() implicitly filters out such cases.
		// However, in case of ParseUnitFileStates() going away, we should not
		// forget about such special cases.
		units := strings.Split(strings.TrimSpace(outListUnitFiles), "\n")
		allStates := util.ParseUnitFileStates(units)
		nUnits = len(allStates)
		if nUnits != expectedUnits {
			// retry until number of units matched
			return false
		}

		for _, state := range allStates {
			name := state.Name
			if _, ok := retStates[name]; !ok {
				retStates[name] = []util.UnitFileState{}
			}
			retStates[name] = append(retStates[name], state)
		}
		return true
	}

	timeout, err := util.WaitForState(checkListUnitFiles)
	if err != nil {
		return nil, fmt.Errorf("failed to find %d units within %v (last found: %d)",
			expectedUnits, timeout, nUnits)
	}

	return retStates, nil
}
Ejemplo n.º 3
0
// waitForReloadConfig returns if a message "Reloading configuration" exists
// in the journal, periodically checking for the journal up to the timeout.
func waitForReloadConfig(cluster platform.Cluster, m0 platform.Member) (err error) {
	_, err = util.WaitForState(
		func() bool {
			// NOTE: journalctl should run just simply like "journalctl -u fleet",
			// without being piped with grep. Doing
			// "journalctl -u fleet | grep \"Reloading configuration\"" is racy
			// in a subtle way, so that it sometimes fails only on semaphoreci.
			// - dpark 20160408
			stdout, _, _ := cluster.MemberCommand(m0, "sudo", "journalctl --priority=info _PID=$(pidof fleetd)")
			journalfleet := strings.TrimSpace(stdout)
			if !strings.Contains(journalfleet, "Reloading configuration") {
				fmt.Errorf("Fleetd is not fully reconfigured, retrying... entire fleet journal:\n%v", journalfleet)
				return false
			}
			return true
		},
	)
	if err != nil {
		return fmt.Errorf("Reloading configuration log not found: %v", err)
	}

	return nil
}
Ejemplo n.º 4
0
func (nc *nspawnCluster) WaitForNActiveUnits(m Member, count int) (map[string][]util.UnitState, error) {
	var nactive int
	states := make(map[string][]util.UnitState)

	timeout, err := util.WaitForState(
		func() bool {
			stdout, _, err := nc.Fleetctl(m, "list-units", "--no-legend", "--full", "--fields", "unit,active,machine")
			stdout = strings.TrimSpace(stdout)
			if err != nil {
				return false
			}

			lines := strings.Split(stdout, "\n")
			allStates := util.ParseUnitStates(lines)
			active := util.FilterActiveUnits(allStates)
			nactive = len(active)
			if nactive != count {
				return false
			}

			for _, state := range active {
				name := state.Name
				if _, ok := states[name]; !ok {
					states[name] = []util.UnitState{}
				}
				states[name] = append(states[name], state)
			}
			return true
		},
	)
	if err != nil {
		return nil, fmt.Errorf("failed to find %d active units within %v (last found: %d)", count, timeout, nactive)
	}

	return states, nil
}
Ejemplo n.º 5
0
func TestScheduleOneWayConflict(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	// Start with a simple three-node cluster
	members, err := platform.CreateNClusterMembers(cluster, 1)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	if _, err := cluster.WaitForNMachines(m0, 1); err != nil {
		t.Fatal(err)
	}

	// Start a unit that conflicts with a yet-to-be-scheduled unit
	name := "fixtures/units/conflicts-with-hello.service"
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err)
	}

	active, err := cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	states, err := util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	// Start a unit that has not defined conflicts
	name = "fixtures/units/hello.service"
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err)
	}

	// Both units should show up, but only conflicts-with-hello.service
	// should report ACTIVE
	stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-unit-files: %v", err)
	}
	units := strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != 2 {
		t.Fatalf("Did not find two units in cluster: \n%s", stdout)
	}
	active, err = cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	states, err = util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	for unit := range states {
		if unit != "conflicts-with-hello.service" {
			t.Error("Incorrect unit started:", unit)
		}
	}

	// Destroying the conflicting unit should allow the other to start
	name = "conflicts-with-hello.service"
	if _, _, err := cluster.Fleetctl(m0, "destroy", name); err != nil {
		t.Fatalf("Failed destroying %s", name)
	}

	// NOTE: we need to sleep here shortly to avoid occasional errors of
	// conflicts-with-hello.service being rescheduled even after being destroyed.
	// In that case, the conflicts unit remains active, while the original
	// hello.service remains inactive. Then the test TestScheduleOneWayConflict
	// fails at the end with a message "Incorrect unit started".
	// This error seems to occur frequently when enable_grpc turned on.
	// - dpark 20160615
	time.Sleep(1 * time.Second)

	// Wait for the destroyed unit to actually disappear
	timeout, err := util.WaitForState(
		func() bool {
			stdout, _, err := cluster.Fleetctl(m0, "list-units", "--no-legend", "--full", "--fields", "unit,active,machine")
			if err != nil {
				return false
			}
			lines := strings.Split(strings.TrimSpace(stdout), "\n")
			states := util.ParseUnitStates(lines)
			for _, state := range states {
				if state.Name == name {
					return false
				}
			}
			return true
		},
	)
	if err != nil {
		t.Fatalf("Destroyed unit %s not gone within %v", name, timeout)
	}

	active, err = cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	states, err = util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}
	for unit := range states {
		if unit != "hello.service" {
			t.Error("Incorrect unit started:", unit)
		}
	}

}
Ejemplo n.º 6
0
// Check that units states do not change on loss of connectivity to etcd.
//
// Note: this only tests the behaviour of the disconnected node;
// but not the reaction of the rest of the cluster,
// nor reconciliation after connectivity is restored.
func TestSingleNodeConnectivityLoss(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	m0, err := cluster.CreateMember()
	if err != nil {
		t.Fatal(err)
	}
	_, err = cluster.WaitForNMachines(m0, 1)
	if err != nil {
		t.Fatal(err)
	}

	// Set up some units.
	stateMapping := map[string]struct {
		command          []string
		runState         string
		systemdFileState string
		systemdState     []string
	}{
		"inactive": {[]string{"submit"}, "", "", nil},
		"loaded":   {[]string{"load", "--no-block"}, "inactive", "linked-runtime", nil},
		"launched": {[]string{"start", "--no-block"}, "active", "linked-runtime", []string{"loaded", "active", "running"}},
	}
	createUnits := map[string][]string{}
	expectedUnitFiles := map[string]string{}
	expectedUnitStates := map[string]string{}
	expectedSystemdFiles := map[string]string{}
	expectedSystemdStates := map[string][]string{}
	for _, service := range []string{"single", "global"} {
		for state, mapping := range stateMapping {
			unitName := fmt.Sprintf("%s@%s.service", service, state)
			unitPath := fmt.Sprintf("fixtures/units/%s", unitName)
			createUnits[unitName] = append(mapping.command, unitPath)

			expectedUnitFiles[unitName] = state

			if mapping.runState != "" {
				expectedUnitStates[unitName] = mapping.runState
			}

			if mapping.systemdFileState != "" {
				expectedSystemdFiles[unitName] = mapping.systemdFileState
			}

			if mapping.systemdState != nil {
				expectedSystemdStates[unitName] = mapping.systemdState
			}
		}
	}
	for name, command := range createUnits {
		stdout, stderr, err := cluster.Fleetctl(m0, command...)
		if err != nil {
			t.Fatalf("Failed creating unit %s: %v\nstdout: %s\nstderr:%s", name, err, stdout, stderr)
		}
	}

	checkExpectedStates := func() (isExpected bool, expected, actual map[string]string) {
		// First check unit files.
		// These shouldn't change at all after intital submit -- but better safe than sorry...
		stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend", "--full", "--fields", "unit,dstate")
		if err != nil {
			t.Errorf("Failed listing unit files: %v", err)
		}
		stdout = strings.TrimSpace(stdout)

		lines := strings.Split(stdout, "\n")
		actualUnitFiles := map[string]string{}
		if stdout != "" {
			for _, line := range lines {
				cols := strings.Fields(line)
				actualUnitFiles[cols[0]] = cols[1]
			}
		}

		if !reflect.DeepEqual(actualUnitFiles, expectedUnitFiles) {
			return false, expectedUnitFiles, actualUnitFiles
		}

		// Now check the actual unit states.
		stdout, _, err = cluster.Fleetctl(m0, "list-units", "--no-legend", "--full", "--fields", "unit,active")
		if err != nil {
			t.Errorf("Failed listing units: %v", err)
		}
		stdout = strings.TrimSpace(stdout)

		lines = strings.Split(stdout, "\n")
		actualUnitStates := map[string]string{}
		if stdout != "" {
			for _, line := range lines {
				cols := strings.Fields(line)
				actualUnitStates[cols[0]] = cols[1]
			}
		}

		return reflect.DeepEqual(actualUnitStates, expectedUnitStates), expectedUnitStates, actualUnitStates
	}

	// Wait for initial state being reached.
	timeout, err := util.WaitForState(
		func() bool { isExpected, _, _ := checkExpectedStates(); return isExpected },
	)
	if err != nil {
		t.Fatalf("Failed to reach expected initial state within %v.", timeout)
	}

	// Cut connection to etcd.
	//
	// We use REJECT here, so fleet knows immediately that it's disconnected, rather than waiting for a timeout.
	if _, err = cluster.MemberCommand(m0, "sudo", "iptables", "-I", "OUTPUT", "-p", "tcp", "-m", "multiport", "--dports=2379,4001", "-j", "REJECT"); err != nil {
		t.Fatal(err)
	}

	// Wait long enough to be reasonably confident that no more state changes will happen.
	ttl, _ := time.ParseDuration(util.FleetTTL)
	agentReconcileInterval := 5 * time.Second
	slack := 2 * time.Second

	time.Sleep(ttl + agentReconcileInterval + slack)

	// Check unit state after connection loss.
	//
	// Note: we cannot use fleetctl to check the state here,
	// as fleet is not available to give us this information...
	// We have to go deeper, and try to obtain the information from systemd directly.
	actualSystemdFiles := map[string]string{}
	var stdout string
	for name, _ := range expectedSystemdFiles {
		stdout, _ := cluster.MemberCommand(m0, "systemctl", "is-enabled", name)
		// do not check for error, as systemctl is-enabled returns exit status 1 for linked-runtime.
		stdout = strings.TrimSpace(stdout)
		if stdout == "" {
			continue
		}
		actualSystemdFiles[name] = strings.Split(stdout, "\n")[0]
	}

	if !reflect.DeepEqual(actualSystemdFiles, expectedSystemdFiles) {
		t.Fatalf("Units files not in expected state after losing connectivity.\nExpected: %v\nActual: %v", expectedSystemdFiles, actualSystemdFiles)
	}

	stdout, err = cluster.MemberCommand(m0, "systemctl", "list-units", "-t", "service", "--no-legend", "single@*.service", "global@*.service")
	if err != nil {
		t.Fatalf("Failed to retrieve systemd unit states: %v", err)
	}
	stdout = strings.TrimSpace(stdout)
	actualSystemdStates := map[string][]string{}
	if stdout != "" {
		for _, line := range strings.Split(stdout, "\n") {
			fields := strings.Fields(line)
			actualSystemdStates[fields[0]] = fields[1:4]
		}
	}
	if !reflect.DeepEqual(actualSystemdStates, expectedSystemdStates) {
		t.Fatalf("Units not in expected state after losing connectivity.\nExpected: %v\nActual: %v", expectedSystemdStates, actualSystemdStates)
	}

	// Restore etcd connection.
	if _, err = cluster.MemberCommand(m0, "sudo", "iptables", "-D", "OUTPUT", "-p", "tcp", "-m", "multiport", "--dports=2379,4001", "-j", "REJECT"); err != nil {
		t.Fatal(err)
	}

	// Again, wait long enough to be reasonably confident that no more state changes will happen.
	//
	// Here this should cover the time for fleet to realise connectivity is back,
	// and for the Agent to complete the second run after reconnection.
	//
	// (Unlike for the first run immediately after connectivity is back, by the time of the second run,
	// Engine leadership and Engine reconciliation should have been sorted out,
	// and everything should be back to normal...)
	time.Sleep(ttl + agentReconcileInterval + slack)

	// Check state after reconnect.
	var expected, actual map[string]string
	var isExpected bool
	timeout, err = util.WaitForState(
		func() bool { isExpected, expected, actual = checkExpectedStates(); return isExpected },
	)
	if err != nil {
		t.Fatalf("Failed to reach expected initial state within %v.", timeout)
	}
	if !isExpected {
		t.Fatalf("Units not in expected state after restoring connectivity.\nExpected: %v\nActual: %v", expected, actual)
	}

	// Additionally check the logs of all active units for possible temporary state flapping.
	stdout, err = cluster.MemberCommand(m0, "journalctl", "_PID=1")
	if err != nil {
		t.Fatalf("Failed to retrieve journal: %v", err)
	}
	if strings.Contains(stdout, "Stopping single@") || strings.Contains(stdout, "Stopping global@") {
		t.Fatalf("Units were unexpectedly stopped at some point:\n%s", stdout)
	}
}
Ejemplo n.º 7
0
func TestScheduleOneWayConflict(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	// Start with a simple three-node cluster
	members, err := platform.CreateNClusterMembers(cluster, 1)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	if _, err := cluster.WaitForNMachines(m0, 1); err != nil {
		t.Fatal(err)
	}

	// Start a unit that conflicts with a yet-to-be-scheduled unit
	name := "fixtures/units/conflicts-with-hello.service"
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err)
	}

	active, err := cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	states, err := util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	// Start a unit that has not defined conflicts
	name = "fixtures/units/hello.service"
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", name); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", name, stdout, stderr, err)
	}

	// Both units should show up, but only conflicts-with-hello.service
	// should report ACTIVE
	stdout, _, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend")
	if err != nil {
		t.Fatalf("Failed to run list-unit-files: %v", err)
	}
	units := strings.Split(strings.TrimSpace(stdout), "\n")
	if len(units) != 2 {
		t.Fatalf("Did not find two units in cluster: \n%s", stdout)
	}
	active, err = cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	states, err = util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	for unit := range states {
		if unit != "conflicts-with-hello.service" {
			t.Error("Incorrect unit started:", unit)
		}
	}

	// Destroying the conflicting unit should allow the other to start
	name = "conflicts-with-hello.service"
	if _, _, err := cluster.Fleetctl(m0, "destroy", name); err != nil {
		t.Fatalf("Failed destroying %s", name)
	}

	// Wait for the destroyed unit to actually disappear
	timeout, err := util.WaitForState(
		func() bool {
			stdout, _, err := cluster.Fleetctl(m0, "list-units", "--no-legend", "--full", "--fields", "unit,active,machine")
			if err != nil {
				return false
			}
			lines := strings.Split(strings.TrimSpace(stdout), "\n")
			states := util.ParseUnitStates(lines)
			for _, state := range states {
				if state.Name == name {
					return false
				}
			}
			return true
		},
	)
	if err != nil {
		t.Fatalf("Destroyed unit %s not gone within %v", name, timeout)
	}

	active, err = cluster.WaitForNActiveUnits(m0, 1)
	if err != nil {
		t.Fatal(err)
	}
	states, err = util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}
	for unit := range states {
		if unit != "hello.service" {
			t.Error("Incorrect unit started:", unit)
		}
	}

}
Ejemplo n.º 8
0
// TestReplaceSerialization tests if the ExecStartPre of the new version
// of the unit when it replaces the old one is excuted after
// ExecStopPost of the old version.
// This test is to make sure that two versions of the same unit will not
// conflict with each other, that the directives are always serialized,
// and it tries its best to avoid the following scenarios:
// https://github.com/coreos/fleet/issues/1000
// https://github.com/systemd/systemd/issues/518
// Now we can't guarantee that that behaviour will not be triggered by
// another external operation, but at least from the Unit replace
// feature context we try to avoid it.
func TestReplaceSerialization(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	m, err := cluster.CreateMember()
	if err != nil {
		t.Fatal(err)
	}

	_, err = cluster.WaitForNMachines(m, 1)
	if err != nil {
		t.Fatal(err)
	}

	tmpSyncFile := "/tmp/fleetSyncReplaceFile"
	syncOld := "echo 'sync'"
	syncNew := fmt.Sprintf("test -f %s", tmpSyncFile)
	tmpSyncService := "/tmp/replace-sync.service"
	syncService := "fixtures/units/replace-sync.service"

	stdout, stderr, err := cluster.Fleetctl(m, "start", syncService)
	if err != nil {
		t.Fatalf("Unable to start unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	_, err = cluster.WaitForNActiveUnits(m, 1)
	if err != nil {
		t.Fatal(err)
	}

	// replace the unit content, make sure that:
	// It shows up and it did 'test -f /tmp/fleetSyncReplaceFile' correctly
	err = util.GenNewFleetService(tmpSyncService, syncService, syncNew, syncOld)
	if err != nil {
		t.Fatalf("Failed to generate a temp fleet service: %v", err)
	}

	stdout, stderr, err = cluster.Fleetctl(m, "start", "--replace", tmpSyncService)
	if err != nil {
		t.Fatalf("Failed to replace unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	_, err = cluster.WaitForNActiveUnits(m, 1)
	if err != nil {
		t.Fatalf("Did not find 1 unit in cluster, unit replace failed: %v", err)
	}

	// Wait for the sync file, if the sync file is not created then
	// the previous unit failed, if it's created we continue. Here
	// the new version of the unit is probably already running and
	// the ExecStartPre is running at the same time, if it failed
	// then we probably will catch it later when we check its status
	tmpService := path.Base(tmpSyncService)
	timeout, err := util.WaitForState(
		func() bool {
			_, err = cluster.MemberCommand(m, syncNew)
			if err != nil {
				return false
			}
			return true
		},
	)
	if err != nil {
		t.Fatalf("Failed to check if file %s exists within %v", tmpSyncFile, timeout)
	}

	timeout, err = util.WaitForState(
		func() bool {
			stdout, _ = cluster.MemberCommand(m, "systemctl", "show", "--property=ActiveState", tmpService)
			if strings.TrimSpace(stdout) != "ActiveState=active" {
				return false
			}
			return true
		},
	)
	if err != nil {
		t.Fatalf("%s unit not reported as active within %v", tmpService, timeout)
	}

	timeout, err = util.WaitForState(
		func() bool {
			stdout, _ = cluster.MemberCommand(m, "systemctl", "show", "--property=Result", tmpService)
			if strings.TrimSpace(stdout) != "Result=success" {
				return false
			}
			return true
		},
	)
	if err != nil {
		t.Fatalf("Result for %s unit not reported as success withing %v", tmpService, timeout)
	}

	os.Remove(tmpSyncFile)
	os.Remove(tmpSyncService)
}
Ejemplo n.º 9
0
// Load service and discovery units and test whether discovery unit adds itself as a dependency for the service.
func TestInstallUnit(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	// Start with a two-nodes cluster
	members, err := platform.CreateNClusterMembers(cluster, 2)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	_, err = cluster.WaitForNMachines(m0, 2)
	if err != nil {
		t.Fatal(err)
	}

	// Load unit files
	stdout, stderr, err := cluster.Fleetctl(m0, "load", "fixtures/units/hello.service", "fixtures/units/discovery.service")
	if err != nil {
		t.Fatalf("Failed loading unit files: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	checkState := func(match string) bool {
		stdout, _, err := cluster.Fleetctl(m0, "--strict-host-key-checking=false", "ssh", "discovery.service", "systemctl show --property=ActiveState discovery.service")
		if err != nil {
			t.Logf("Failed getting info using remote systemctl: %v", err)
		}
		stdout = strings.TrimSpace(stdout)
		return stdout == fmt.Sprintf("ActiveState=%s", match)
	}

	// Verify that discovery.service unit is loaded but not started
	timeout, err := util.WaitForState(func() bool { return checkState("inactive") })
	if err != nil {
		t.Fatalf("discovery.service unit is not reported as inactive within %v: %v", timeout, err)
	}

	// Start hello.service unit
	stdout, stderr, err = cluster.Fleetctl(m0, "start", "fixtures/units/hello.service")
	if err != nil {
		t.Fatalf("Failed starting unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	// Verify that discovery.service unit was started
	timeout, err = util.WaitForState(func() bool { return checkState("active") })
	if err != nil {
		t.Fatalf("discovery.service unit is not reported as active within %v:\n%v", timeout, err)
	}

	// Stop hello.service unit
	stdout, stderr, err = cluster.Fleetctl(m0, "stop", "fixtures/units/hello.service")
	if err != nil {
		t.Fatalf("Failed stopping unit: \nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
	}

	// Verify that discovery.service unit was stopped
	timeout, err = util.WaitForState(func() bool { return checkState("inactive") })
	if err != nil {
		t.Fatalf("discovery.service unit is not reported as inactive within %v:\n%v", timeout, err)
	}
}
Ejemplo n.º 10
0
// TestScheduleReplace starts 3 units, followed by starting another unit
// that replaces the 1st unit. Then it verifies that the original unit
// got rescheduled on a different machine.
func TestScheduleReplace(t *testing.T) {
	cluster, err := platform.NewNspawnCluster("smoke")
	if err != nil {
		t.Fatal(err)
	}
	defer cluster.Destroy(t)

	members, err := platform.CreateNClusterMembers(cluster, 2)
	if err != nil {
		t.Fatal(err)
	}
	m0 := members[0]
	m1 := members[1]
	if _, err := cluster.WaitForNMachines(m0, 2); err != nil {
		t.Fatal(err)
	}

	// Start 3 units without Replaces, replace.0.service on m0, while both 1 and 2 on m1.
	// That's possible as replace.2.service has an option "MachineOf=replace.1.service".
	uNames := []string{
		"fixtures/units/replace.0.service",
		"fixtures/units/replace.1.service",
		"fixtures/units/replace.2.service",
		"fixtures/units/replace-kick0.service",
	}
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[0]); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[0], stdout, stderr, err)
	}
	if stdout, stderr, err := cluster.Fleetctl(m1, "start", "--no-block", uNames[1]); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[1], stdout, stderr, err)
	}
	if stdout, stderr, err := cluster.Fleetctl(m1, "start", "--no-block", uNames[2]); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[2], stdout, stderr, err)
	}

	active, err := cluster.WaitForNActiveUnits(m0, 3)
	if err != nil {
		t.Fatal(err)
	}
	states, err := util.ActiveToSingleStates(active)
	if err != nil {
		t.Fatal(err)
	}

	oldMach := states[path.Base(uNames[0])].Machine

	// Start a unit replace-kick0.service that replaces replace.0.service
	// Then the kick0 unit will be scheduled to m0, as m0 is least loaded than m1.
	// So it's possible to trigger a situation where kick0 could kick the original unit 0.
	if stdout, stderr, err := cluster.Fleetctl(m0, "start", "--no-block", uNames[3]); err != nil {
		t.Fatalf("Failed starting unit %s: \nstdout: %s\nstderr: %s\nerr: %v", uNames[3], stdout, stderr, err)
	}

	// Here we need to wait up to 15 seconds, to avoid races, because the unit state
	// publisher could otherwise report unit states with old machine IDs to registry.
	checkReplacedMachines := func() bool {
		// Check that 4 units show up
		nUnits := 4
		stdout, stderr, err := cluster.Fleetctl(m0, "list-unit-files", "--no-legend")
		if err != nil {
			t.Logf("Failed to run list-unit-files:\nstdout: %s\nstderr: %s\nerr: %v", stdout, stderr, err)
			return false
		}
		units := strings.Split(strings.TrimSpace(stdout), "\n")
		if len(units) != nUnits {
			t.Logf("Did not find two units in cluster: \n%s", stdout)
			return false
		}
		active, err = cluster.WaitForNActiveUnits(m0, nUnits)
		if err != nil {
			t.Log(err)
			return false
		}
		states, err = util.ActiveToSingleStates(active)
		if err != nil {
			t.Log(err)
			return false
		}

		// Check that replace.0.service is located on a different machine from
		// that of replace-kick0.service.
		uNameBase := make([]string, nUnits)
		machs := make([]string, nUnits)
		for i, uName := range uNames {
			uNameBase[i] = path.Base(uName)
			machs[i] = states[uNameBase[i]].Machine
		}
		if machs[0] == machs[3] {
			t.Logf("machine for %s is %s, the same as that of %s.", uNameBase[0], machs[0], uNameBase[3])
			return false
		}
		if machs[3] != oldMach {
			t.Logf("machine for %s is %s, different from old machine %s.", uNameBase[3], machs[3], oldMach)
			return false
		}
		if machs[0] == oldMach {
			t.Logf("machine for %s is %s, the same as that of %s.", uNameBase[0], machs[0], oldMach)
			return false
		}

		return true
	}
	if timeout, err := util.WaitForState(checkReplacedMachines); err != nil {
		t.Fatalf("Cannot verify replaced units within %v\nerr: %v", timeout, err)
	}
}