Beispiel #1
0
// TestChainingMixed uses a combination of strings and tuples in its
// "output" data.
func TestChainingMixed(t *testing.T) {
	var (
		one, two coordinate.WorkSpec
		attempt  coordinate.Attempt
		units    map[string]coordinate.WorkUnit
		err      error
	)

	sts := SimpleTestSetup{
		NamespaceName: "TestChainingMixed",
		WorkerName:    "worker",
	}
	sts.SetUp(t)
	defer sts.TearDown(t)

	one, err = sts.Namespace.SetWorkSpec(map[string]interface{}{
		"name": "one",
		"then": "two",
	})
	if !assert.NoError(t, err) {
		return
	}

	two, err = sts.Namespace.SetWorkSpec(map[string]interface{}{
		"name": "two",
	})
	if !assert.NoError(t, err) {
		return
	}

	_, err = one.AddWorkUnit("a", map[string]interface{}{}, coordinate.WorkUnitMeta{})
	assert.NoError(t, err)

	sts.WorkSpec = one
	attempt = sts.RequestOneAttempt(t)
	err = attempt.Finish(map[string]interface{}{
		"output": []interface{}{
			"key",
			cborrpc.PythonTuple{Items: []interface{}{
				"key",
				map[string]interface{}{
					"data": "x",
				},
				map[string]interface{}{
					"priority": 10.0,
				},
			}},
		},
	})
	assert.NoError(t, err)

	units, err = two.WorkUnits(coordinate.WorkUnitQuery{})
	if assert.NoError(t, err) {
		if assert.Contains(t, units, "key") {
			DataMatches(t, units["key"], map[string]interface{}{"data": "x"})
			UnitHasPriority(t, units["key"], 10.0)
		}
	}
}
Beispiel #2
0
// UpdateWorkUnit causes some state change in a work unit.  If the
// work unit is pending, this is the principal interface to complete
// or renew it; if it is already complete this can cause it to be
// retried.
func (jobs *JobServer) UpdateWorkUnit(
	workSpecName string,
	workUnitKey string,
	options map[string]interface{},
) (bool, string, error) {
	// Note that in several corner cases, the behavior of this as
	// written disagrees with Python coordinated's:
	//
	// * If neither "lease_time" nor "status" is specified,
	//   Python coordinated immediately returns False without
	//   checking if workUnitKey is valid
	//
	// * Python coordinated allows arbitrary status changes,
	//   including AVAILABLE -> FINISHED
	//
	// * This openly ignores "worker_id", as distinct from Python
	//   coordinated, which logs an obscure warning and changes it,
	//   but only on a renew
	var (
		attempt    coordinate.Attempt
		changed    bool
		err        error
		status     coordinate.AttemptStatus
		uwuOptions UpdateWorkUnitOptions
		workSpec   coordinate.WorkSpec
		workUnit   coordinate.WorkUnit
	)
	err = decode(&uwuOptions, options)
	if err == nil {
		workSpec, err = jobs.Namespace.WorkSpec(workSpecName)
	}
	if err == nil {
		workUnit, err = workSpec.WorkUnit(workUnitKey)
	}
	if err == nil {
		if workUnit == nil {
			return false, fmt.Sprintf("no such work unit key=%v", workUnitKey), nil
		}
	}
	if err == nil {
		attempt, err = workUnit.ActiveAttempt()
	}
	if err == nil && attempt != nil {
		status, err = attempt.Status()
	}
	if err == nil && attempt != nil {
		if status == coordinate.Expired || status == coordinate.Retryable {
			// The Python Coordinate API sees both of these
			// statuses as "available", and we want to fall
			// into the next block.
			attempt = nil
		}
	}
	if err == nil && attempt == nil {
		// Caller is trying to manipulate an AVAILABLE work
		// unit.  Either they are trying to change the work
		// unit data in place, or they are trying to jump a
		// work unit directly to a completed state.  (The
		// latter is possible during the Python work unit
		// parent cleanup, if the timing is bad.)
		if uwuOptions.Status == Available || uwuOptions.Status == 0 {
			// The only thing we are doing is changing the
			// work unit data.
			if uwuOptions.Data != nil {
				meta, err := workUnit.Meta()
				if err == nil {
					_, err = workSpec.AddWorkUnit(workUnit.Name(), uwuOptions.Data, meta)
				}
				if err == nil {
					changed = true
				}
			}
			return changed && err == nil, "", err
		}
		// Otherwise we are trying to transition to another
		// state; so force-create an attempt.
		worker, err := jobs.Namespace.Worker(uwuOptions.WorkerID)
		if err == nil {
			attempt, err = worker.MakeAttempt(workUnit, uwuOptions.LeaseDuration())
			status = coordinate.Pending
		}
	}
	if err == nil {
		switch status {
		case coordinate.Pending:
			changed = true // or there's an error
			switch uwuOptions.Status {
			case 0, Pending:
				err = attempt.Renew(uwuOptions.LeaseDuration(), uwuOptions.Data)
			case Available:
				err = attempt.Expire(uwuOptions.Data)
			case Finished:
				err = attempt.Finish(uwuOptions.Data)
			case Failed:
				err = attempt.Fail(uwuOptions.Data)
			default:
				err = errors.New("update_work_unit invalid status")
			}
		case coordinate.Expired:
			err = errors.New("update_work_unit logic error, trying to refresh expired unit")
		case coordinate.Finished:
			switch uwuOptions.Status {
			case 0, Finished:
				changed = false // no-op
			case Available:
				err = workUnit.ClearActiveAttempt()
				changed = true
			case Failed:
				changed = false // see below
			default:
				err = errors.New("update_work_unit cannot change finished unit")
			}
		case coordinate.Failed:
			switch uwuOptions.Status {
			case 0, Failed:
				changed = false // no-op
			case Available: // "retry"
				err = workUnit.ClearActiveAttempt()
				changed = true
			case Finished:
				// The Python worker, with two separate
				// processes, has a race wherein there
				// could be 15 seconds to go, the parent
				// kills off the child, and the child
				// finishes successfully, all at the same
				// time.  In that case the successful
				// finish should win.
				err = attempt.Finish(nil)
				changed = true
			default:
				err = errors.New("update_work_unit cannot change failed unit")
			}
		case coordinate.Retryable:
			err = errors.New("update_work_unit logic error, trying to refresh retryable unit")
		default:
			err = fmt.Errorf("update_work_unit invalid attempt status %+v", status)
		}
	}
	return changed && err == nil, "", err
}
Beispiel #3
0
// TestChainingDuplicate tests that work unit chaining still works
// even when the same output work unit is generated twice (it should
// get retried).
func TestChainingDuplicate(t *testing.T) {
	var (
		err      error
		one, two coordinate.WorkSpec
		attempt  coordinate.Attempt
	)

	sts := SimpleTestSetup{
		NamespaceName: "TestChainingDuplicate",
		WorkerName:    "worker",
	}
	sts.SetUp(t)
	defer sts.TearDown(t)

	one, err = sts.Namespace.SetWorkSpec(map[string]interface{}{
		"name":     "one",
		"then":     "two",
		"priority": 1,
	})
	if !assert.NoError(t, err) {
		return
	}

	two, err = sts.Namespace.SetWorkSpec(map[string]interface{}{
		"name":     "two",
		"priority": 2,
	})
	if !assert.NoError(t, err) {
		return
	}

	_, err = one.AddWorkUnit("a", map[string]interface{}{}, coordinate.WorkUnitMeta{})
	assert.NoError(t, err)

	_, err = one.AddWorkUnit("b", map[string]interface{}{}, coordinate.WorkUnitMeta{})
	assert.NoError(t, err)

	sts.WorkSpec = one
	attempt = sts.RequestOneAttempt(t)
	assert.Equal(t, "a", attempt.WorkUnit().Name())

	err = attempt.Finish(map[string]interface{}{
		"output": []string{"z"},
	})
	assert.NoError(t, err)

	sts.WorkSpec = two
	attempt = sts.RequestOneAttempt(t)
	assert.Equal(t, "z", attempt.WorkUnit().Name())

	err = attempt.Finish(map[string]interface{}{})
	assert.NoError(t, err)

	sts.WorkSpec = one
	attempt = sts.RequestOneAttempt(t)
	assert.Equal(t, "b", attempt.WorkUnit().Name())

	err = attempt.Finish(map[string]interface{}{
		"output": []string{"z"},
	})
	assert.NoError(t, err)

	sts.WorkSpec = two
	attempt = sts.RequestOneAttempt(t)
	assert.Equal(t, "z", attempt.WorkUnit().Name())

	err = attempt.Finish(map[string]interface{}{})
	assert.NoError(t, err)

	sts.RequestNoAttempts(t)
}
Beispiel #4
0
// TestChainingTwoStep separately renews an attempt to insert an output
// key, then finishes the work unit; it should still chain.
func TestChainingTwoStep(t *testing.T) {
	var (
		one, two coordinate.WorkSpec
		attempt  coordinate.Attempt
		units    map[string]coordinate.WorkUnit
		unit     coordinate.WorkUnit
		err      error
	)

	sts := SimpleTestSetup{
		NamespaceName: "TestChainingTwoStep",
		WorkerName:    "worker",
	}
	sts.SetUp(t)
	defer sts.TearDown(t)

	one, err = sts.Namespace.SetWorkSpec(map[string]interface{}{
		"name": "one",
		"then": "two",
	})
	if !assert.NoError(t, err) {
		return
	}

	two, err = sts.Namespace.SetWorkSpec(map[string]interface{}{
		"name": "two",
	})
	if !assert.NoError(t, err) {
		return
	}

	_, err = one.AddWorkUnit("a", map[string]interface{}{}, coordinate.WorkUnitMeta{})
	assert.NoError(t, err)

	sts.WorkSpec = one
	attempt = sts.RequestOneAttempt(t)
	err = attempt.Renew(900*time.Second,
		map[string]interface{}{
			"output": []interface{}{
				[]byte{1, 2, 3, 4},
				cborrpc.PythonTuple{Items: []interface{}{
					[]byte{1, 2, 3, 4},
					map[interface{}]interface{}{},
					map[interface{}]interface{}{
						"priority": 0,
					},
				}},
			},
		})
	assert.NoError(t, err)

	err = attempt.Finish(nil)
	assert.NoError(t, err)

	units, err = two.WorkUnits(coordinate.WorkUnitQuery{})
	if assert.NoError(t, err) {
		if assert.Contains(t, units, "\x01\x02\x03\x04") {
			unit = units["\x01\x02\x03\x04"]
			DataEmpty(t, unit)
			UnitHasPriority(t, unit, 0.0)
		}
	}
}
Beispiel #5
0
// TestWorkUnitChaining tests that completing work units in one work spec
// will cause work units to appear in another, if so configured.
func TestWorkUnitChaining(t *testing.T) {
	var (
		err      error
		one, two coordinate.WorkSpec
		units    map[string]coordinate.WorkUnit
		attempt  coordinate.Attempt
	)

	sts := SimpleTestSetup{
		NamespaceName: "TestWorkUnitChaining",
		WorkerName:    "worker",
	}
	sts.SetUp(t)
	defer sts.TearDown(t)

	one, err = sts.Namespace.SetWorkSpec(map[string]interface{}{
		"name": "one",
		"then": "two",
	})
	if !assert.NoError(t, err) {
		return
	}
	// RequestAttempts always returns this
	sts.WorkSpec = one

	two, err = sts.Namespace.SetWorkSpec(map[string]interface{}{
		"name":     "two",
		"disabled": true,
	})
	if !assert.NoError(t, err) {
		return
	}

	// Create and perform a work unit, with no output
	_, err = one.AddWorkUnit("a", map[string]interface{}{}, coordinate.WorkUnitMeta{})
	assert.NoError(t, err)

	sts.WorkSpec = one
	attempt = sts.RequestOneAttempt(t)
	err = attempt.Finish(nil)
	assert.NoError(t, err)

	units, err = two.WorkUnits(coordinate.WorkUnitQuery{})
	if assert.NoError(t, err) {
		assert.Empty(t, units)
	}

	// Create and perform a work unit, with a map output
	_, err = one.AddWorkUnit("b", map[string]interface{}{}, coordinate.WorkUnitMeta{})
	assert.NoError(t, err)

	attempt = sts.RequestOneAttempt(t)
	err = attempt.Finish(map[string]interface{}{
		"output": map[string]interface{}{
			"two_b": map[string]interface{}{"k": "v"},
		},
	})
	assert.NoError(t, err)

	units, err = two.WorkUnits(coordinate.WorkUnitQuery{})
	if assert.NoError(t, err) {
		assert.Len(t, units, 1)
		if assert.Contains(t, units, "two_b") {
			DataMatches(t, units["two_b"], map[string]interface{}{"k": "v"})
		}
	}

	// Create and perform a work unit, with a slice output
	_, err = one.AddWorkUnit("c", map[string]interface{}{}, coordinate.WorkUnitMeta{})
	assert.NoError(t, err)

	attempt = sts.RequestOneAttempt(t)
	err = attempt.Finish(map[string]interface{}{
		"output": []string{"two_c", "two_cc"},
	})
	assert.NoError(t, err)

	units, err = two.WorkUnits(coordinate.WorkUnitQuery{})
	if assert.NoError(t, err) {
		assert.Len(t, units, 3)
		assert.Contains(t, units, "two_b")
		assert.Contains(t, units, "two_cc")
		if assert.Contains(t, units, "two_c") {
			DataEmpty(t, units["two_c"])
		}
	}

	// Put the output in the original work unit data
	_, err = one.AddWorkUnit("d", map[string]interface{}{
		"output": []string{"two_d"},
	}, coordinate.WorkUnitMeta{})
	assert.NoError(t, err)
	attempt = sts.RequestOneAttempt(t)
	err = attempt.Finish(nil)
	assert.NoError(t, err)

	units, err = two.WorkUnits(coordinate.WorkUnitQuery{})
	if assert.NoError(t, err) {
		assert.Len(t, units, 4)
		assert.Contains(t, units, "two_b")
		assert.Contains(t, units, "two_c")
		assert.Contains(t, units, "two_cc")
		assert.Contains(t, units, "two_d")
	}
}
Beispiel #6
0
// TestAttemptLifetime validates a basic attempt lifetime.
func TestAttemptLifetime(t *testing.T) {
	var (
		err               error
		data              map[string]interface{}
		attempt, attempt2 coordinate.Attempt
	)
	sts := SimpleTestSetup{
		NamespaceName: "TestAttemptLifetime",
		WorkerName:    "worker",
		WorkSpecName:  "spec",
		WorkUnitName:  "a",
	}
	sts.SetUp(t)
	defer sts.TearDown(t)

	// The work unit should be "available"
	sts.CheckUnitStatus(t, coordinate.AvailableUnit)

	// The work unit data should be defined but empty
	DataEmpty(t, sts.WorkUnit)

	// Get an attempt for it
	attempt = sts.RequestOneAttempt(t)

	// The work unit and attempt should both be "pending"
	sts.CheckUnitStatus(t, coordinate.PendingUnit)
	AttemptStatus(t, coordinate.Pending, attempt)

	// The active attempt for the unit should match this
	attempt2, err = sts.WorkUnit.ActiveAttempt()
	if assert.NoError(t, err) {
		AttemptMatches(t, attempt, attempt2)
	}

	// There should be one active attempt for the worker and it should
	// also match
	attempts, err := sts.Worker.ActiveAttempts()
	if assert.NoError(t, err) {
		if assert.Len(t, attempts, 1) {
			AttemptMatches(t, attempt, attempts[0])
		}
	}

	// The work unit data should (still) be defined but empty
	DataEmpty(t, sts.WorkUnit)

	// Now finish the attempt with some updated data
	err = attempt.Finish(map[string]interface{}{
		"outputs": []string{"yes"},
	})
	assert.NoError(t, err)

	// The unit and should report "finished"
	sts.CheckUnitStatus(t, coordinate.FinishedUnit)
	AttemptStatus(t, coordinate.Finished, attempt)

	// The attempt should still be the active attempt for the unit
	attempt2, err = sts.WorkUnit.ActiveAttempt()
	if assert.NoError(t, err) {
		AttemptMatches(t, attempt, attempt2)
	}

	// The attempt should not be in the active attempt list for the worker
	attempts, err = sts.Worker.ActiveAttempts()
	if assert.NoError(t, err) {
		assert.Empty(t, attempts)
	}

	// Both the unit and the worker should have one archived attempt
	attempts, err = sts.WorkUnit.Attempts()
	if assert.NoError(t, err) {
		if assert.Len(t, attempts, 1) {
			AttemptMatches(t, attempt, attempts[0])
		}
	}

	attempts, err = sts.Worker.AllAttempts()
	if assert.NoError(t, err) {
		if assert.Len(t, attempts, 1) {
			AttemptMatches(t, attempt, attempts[0])
		}
	}

	// This should have updated the visible work unit data too
	data, err = sts.WorkUnit.Data()
	if assert.NoError(t, err) {
		assert.Len(t, data, 1)
		if assert.Contains(t, data, "outputs") {
			if assert.Len(t, data["outputs"], 1) {
				assert.Equal(t, "yes", reflect.ValueOf(data["outputs"]).Index(0).Interface())
			}
		}
	}

	// For bonus points, force-clear the active attempt
	err = sts.WorkUnit.ClearActiveAttempt()
	assert.NoError(t, err)

	// This should have pushed the unit back to available
	sts.CheckUnitStatus(t, coordinate.AvailableUnit)

	// This also should have reset the work unit data
	DataEmpty(t, sts.WorkUnit)

	// But, this should not have reset the historical attempts
	attempts, err = sts.WorkUnit.Attempts()
	if assert.NoError(t, err) {
		if assert.Len(t, attempts, 1) {
			AttemptMatches(t, attempt, attempts[0])
		}
	}

	attempts, err = sts.Worker.AllAttempts()
	if assert.NoError(t, err) {
		if assert.Len(t, attempts, 1) {
			AttemptMatches(t, attempt, attempts[0])
		}
	}
}