// TestChainingMixed uses a combination of strings and tuples in its // "output" data. func TestChainingMixed(t *testing.T) { var ( one, two coordinate.WorkSpec attempt coordinate.Attempt units map[string]coordinate.WorkUnit err error ) sts := SimpleTestSetup{ NamespaceName: "TestChainingMixed", WorkerName: "worker", } sts.SetUp(t) defer sts.TearDown(t) one, err = sts.Namespace.SetWorkSpec(map[string]interface{}{ "name": "one", "then": "two", }) if !assert.NoError(t, err) { return } two, err = sts.Namespace.SetWorkSpec(map[string]interface{}{ "name": "two", }) if !assert.NoError(t, err) { return } _, err = one.AddWorkUnit("a", map[string]interface{}{}, coordinate.WorkUnitMeta{}) assert.NoError(t, err) sts.WorkSpec = one attempt = sts.RequestOneAttempt(t) err = attempt.Finish(map[string]interface{}{ "output": []interface{}{ "key", cborrpc.PythonTuple{Items: []interface{}{ "key", map[string]interface{}{ "data": "x", }, map[string]interface{}{ "priority": 10.0, }, }}, }, }) assert.NoError(t, err) units, err = two.WorkUnits(coordinate.WorkUnitQuery{}) if assert.NoError(t, err) { if assert.Contains(t, units, "key") { DataMatches(t, units["key"], map[string]interface{}{"data": "x"}) UnitHasPriority(t, units["key"], 10.0) } } }
func (api *restAPI) fillAttemptShort(namespace coordinate.Namespace, attempt coordinate.Attempt, short *restdata.AttemptShort) error { var err error short.StartTime, err = attempt.StartTime() builder := api.attemptURLBuilder(namespace, attempt, short.StartTime, err) builder.URL(&short.URL, "attempt") builder.URL(&short.WorkUnitURL, "workUnit") builder.URL(&short.WorkerURL, "worker") return builder.Error }
func getWorkTuple(attempt coordinate.Attempt) (cborrpc.PythonTuple, error) { data, err := attempt.Data() if err != nil { return cborrpc.PythonTuple{}, err } workUnit := attempt.WorkUnit() return cborrpc.PythonTuple{Items: []interface{}{ workUnit.WorkSpec().Name(), []byte(workUnit.Name()), data, }}, nil }
// attemptMap turns a single attempt into the map returned by // GetChildWorkUnits(). func attemptMap(attempt coordinate.Attempt) (map[string]interface{}, error) { // First try to swap out attempt for its work unit's actual // active attempt. workUnit := attempt.WorkUnit() activeAttempt, err := workUnit.ActiveAttempt() if err != nil { return nil, err } if activeAttempt != nil { attempt = activeAttempt } // Collect extra data we need and build the result data, err := attempt.Data() if err != nil { return nil, err } expires, err := attempt.ExpirationTime() if err != nil { return nil, err } result := map[string]interface{}{ "work_spec_name": workUnit.WorkSpec().Name(), "work_unit_key": []byte(workUnit.Name()), "work_unit_data": data, "worker_id": attempt.Worker().Name(), "expires": expires.Unix(), } return result, nil }
func (api *restAPI) attemptURLBuilder(namespace coordinate.Namespace, attempt coordinate.Attempt, startTime time.Time, err error) *urlBuilder { unit := attempt.WorkUnit() spec := unit.WorkSpec() worker := attempt.Worker() if err == nil { return buildURLs(api.Router, "namespace", namespace.Name(), "spec", spec.Name(), "unit", unit.Name(), "worker", worker.Name(), "start", startTime.Format(time.RFC3339), ) } return &urlBuilder{Error: err} }
func (api *restAPI) fillAttempt(namespace coordinate.Namespace, attempt coordinate.Attempt, repr *restdata.Attempt) error { err := api.fillAttemptShort(namespace, attempt, &repr.AttemptShort) if err == nil { repr.Status, err = attempt.Status() } if err == nil { repr.Data, err = attempt.Data() } if err == nil { repr.EndTime, err = attempt.EndTime() } if err == nil { repr.ExpirationTime, err = attempt.ExpirationTime() } builder := api.attemptURLBuilder(namespace, attempt, repr.StartTime, err) builder.URL(&repr.RenewURL, "attemptRenew") builder.URL(&repr.ExpireURL, "attemptExpire") builder.URL(&repr.FinishURL, "attemptFinish") builder.URL(&repr.FailURL, "attemptFail") builder.URL(&repr.RetryURL, "attemptRetry") return builder.Error }
// AttemptStatus checks that an attempt has an expected status. func AttemptStatus(t *testing.T, expected coordinate.AttemptStatus, attempt coordinate.Attempt) { actual, err := attempt.Status() if assert.NoError(t, err) { assert.Equal(t, expected, actual) } }
// AttemptMatches checks that two attempts are attempting the same thing. func AttemptMatches(t *testing.T, expected, actual coordinate.Attempt) bool { return (assert.Equal(t, expected.Worker().Name(), actual.Worker().Name()) && assert.Equal(t, expected.WorkUnit().Name(), actual.WorkUnit().Name()) && assert.Equal(t, expected.WorkUnit().WorkSpec().Name(), actual.WorkUnit().WorkSpec().Name())) }
// UpdateWorkUnit causes some state change in a work unit. If the // work unit is pending, this is the principal interface to complete // or renew it; if it is already complete this can cause it to be // retried. func (jobs *JobServer) UpdateWorkUnit( workSpecName string, workUnitKey string, options map[string]interface{}, ) (bool, string, error) { // Note that in several corner cases, the behavior of this as // written disagrees with Python coordinated's: // // * If neither "lease_time" nor "status" is specified, // Python coordinated immediately returns False without // checking if workUnitKey is valid // // * Python coordinated allows arbitrary status changes, // including AVAILABLE -> FINISHED // // * This openly ignores "worker_id", as distinct from Python // coordinated, which logs an obscure warning and changes it, // but only on a renew var ( attempt coordinate.Attempt changed bool err error status coordinate.AttemptStatus uwuOptions UpdateWorkUnitOptions workSpec coordinate.WorkSpec workUnit coordinate.WorkUnit ) err = decode(&uwuOptions, options) if err == nil { workSpec, err = jobs.Namespace.WorkSpec(workSpecName) } if err == nil { workUnit, err = workSpec.WorkUnit(workUnitKey) } if err == nil { if workUnit == nil { return false, fmt.Sprintf("no such work unit key=%v", workUnitKey), nil } } if err == nil { attempt, err = workUnit.ActiveAttempt() } if err == nil && attempt != nil { status, err = attempt.Status() } if err == nil && attempt != nil { if status == coordinate.Expired || status == coordinate.Retryable { // The Python Coordinate API sees both of these // statuses as "available", and we want to fall // into the next block. attempt = nil } } if err == nil && attempt == nil { // Caller is trying to manipulate an AVAILABLE work // unit. Either they are trying to change the work // unit data in place, or they are trying to jump a // work unit directly to a completed state. (The // latter is possible during the Python work unit // parent cleanup, if the timing is bad.) if uwuOptions.Status == Available || uwuOptions.Status == 0 { // The only thing we are doing is changing the // work unit data. if uwuOptions.Data != nil { meta, err := workUnit.Meta() if err == nil { _, err = workSpec.AddWorkUnit(workUnit.Name(), uwuOptions.Data, meta) } if err == nil { changed = true } } return changed && err == nil, "", err } // Otherwise we are trying to transition to another // state; so force-create an attempt. worker, err := jobs.Namespace.Worker(uwuOptions.WorkerID) if err == nil { attempt, err = worker.MakeAttempt(workUnit, uwuOptions.LeaseDuration()) status = coordinate.Pending } } if err == nil { switch status { case coordinate.Pending: changed = true // or there's an error switch uwuOptions.Status { case 0, Pending: err = attempt.Renew(uwuOptions.LeaseDuration(), uwuOptions.Data) case Available: err = attempt.Expire(uwuOptions.Data) case Finished: err = attempt.Finish(uwuOptions.Data) case Failed: err = attempt.Fail(uwuOptions.Data) default: err = errors.New("update_work_unit invalid status") } case coordinate.Expired: err = errors.New("update_work_unit logic error, trying to refresh expired unit") case coordinate.Finished: switch uwuOptions.Status { case 0, Finished: changed = false // no-op case Available: err = workUnit.ClearActiveAttempt() changed = true case Failed: changed = false // see below default: err = errors.New("update_work_unit cannot change finished unit") } case coordinate.Failed: switch uwuOptions.Status { case 0, Failed: changed = false // no-op case Available: // "retry" err = workUnit.ClearActiveAttempt() changed = true case Finished: // The Python worker, with two separate // processes, has a race wherein there // could be 15 seconds to go, the parent // kills off the child, and the child // finishes successfully, all at the same // time. In that case the successful // finish should win. err = attempt.Finish(nil) changed = true default: err = errors.New("update_work_unit cannot change failed unit") } case coordinate.Retryable: err = errors.New("update_work_unit logic error, trying to refresh retryable unit") default: err = fmt.Errorf("update_work_unit invalid attempt status %+v", status) } } return changed && err == nil, "", err }
// TestChainingDuplicate tests that work unit chaining still works // even when the same output work unit is generated twice (it should // get retried). func TestChainingDuplicate(t *testing.T) { var ( err error one, two coordinate.WorkSpec attempt coordinate.Attempt ) sts := SimpleTestSetup{ NamespaceName: "TestChainingDuplicate", WorkerName: "worker", } sts.SetUp(t) defer sts.TearDown(t) one, err = sts.Namespace.SetWorkSpec(map[string]interface{}{ "name": "one", "then": "two", "priority": 1, }) if !assert.NoError(t, err) { return } two, err = sts.Namespace.SetWorkSpec(map[string]interface{}{ "name": "two", "priority": 2, }) if !assert.NoError(t, err) { return } _, err = one.AddWorkUnit("a", map[string]interface{}{}, coordinate.WorkUnitMeta{}) assert.NoError(t, err) _, err = one.AddWorkUnit("b", map[string]interface{}{}, coordinate.WorkUnitMeta{}) assert.NoError(t, err) sts.WorkSpec = one attempt = sts.RequestOneAttempt(t) assert.Equal(t, "a", attempt.WorkUnit().Name()) err = attempt.Finish(map[string]interface{}{ "output": []string{"z"}, }) assert.NoError(t, err) sts.WorkSpec = two attempt = sts.RequestOneAttempt(t) assert.Equal(t, "z", attempt.WorkUnit().Name()) err = attempt.Finish(map[string]interface{}{}) assert.NoError(t, err) sts.WorkSpec = one attempt = sts.RequestOneAttempt(t) assert.Equal(t, "b", attempt.WorkUnit().Name()) err = attempt.Finish(map[string]interface{}{ "output": []string{"z"}, }) assert.NoError(t, err) sts.WorkSpec = two attempt = sts.RequestOneAttempt(t) assert.Equal(t, "z", attempt.WorkUnit().Name()) err = attempt.Finish(map[string]interface{}{}) assert.NoError(t, err) sts.RequestNoAttempts(t) }
// TestChainingTwoStep separately renews an attempt to insert an output // key, then finishes the work unit; it should still chain. func TestChainingTwoStep(t *testing.T) { var ( one, two coordinate.WorkSpec attempt coordinate.Attempt units map[string]coordinate.WorkUnit unit coordinate.WorkUnit err error ) sts := SimpleTestSetup{ NamespaceName: "TestChainingTwoStep", WorkerName: "worker", } sts.SetUp(t) defer sts.TearDown(t) one, err = sts.Namespace.SetWorkSpec(map[string]interface{}{ "name": "one", "then": "two", }) if !assert.NoError(t, err) { return } two, err = sts.Namespace.SetWorkSpec(map[string]interface{}{ "name": "two", }) if !assert.NoError(t, err) { return } _, err = one.AddWorkUnit("a", map[string]interface{}{}, coordinate.WorkUnitMeta{}) assert.NoError(t, err) sts.WorkSpec = one attempt = sts.RequestOneAttempt(t) err = attempt.Renew(900*time.Second, map[string]interface{}{ "output": []interface{}{ []byte{1, 2, 3, 4}, cborrpc.PythonTuple{Items: []interface{}{ []byte{1, 2, 3, 4}, map[interface{}]interface{}{}, map[interface{}]interface{}{ "priority": 0, }, }}, }, }) assert.NoError(t, err) err = attempt.Finish(nil) assert.NoError(t, err) units, err = two.WorkUnits(coordinate.WorkUnitQuery{}) if assert.NoError(t, err) { if assert.Contains(t, units, "\x01\x02\x03\x04") { unit = units["\x01\x02\x03\x04"] DataEmpty(t, unit) UnitHasPriority(t, unit, 0.0) } } }
// TestWorkUnitChaining tests that completing work units in one work spec // will cause work units to appear in another, if so configured. func TestWorkUnitChaining(t *testing.T) { var ( err error one, two coordinate.WorkSpec units map[string]coordinate.WorkUnit attempt coordinate.Attempt ) sts := SimpleTestSetup{ NamespaceName: "TestWorkUnitChaining", WorkerName: "worker", } sts.SetUp(t) defer sts.TearDown(t) one, err = sts.Namespace.SetWorkSpec(map[string]interface{}{ "name": "one", "then": "two", }) if !assert.NoError(t, err) { return } // RequestAttempts always returns this sts.WorkSpec = one two, err = sts.Namespace.SetWorkSpec(map[string]interface{}{ "name": "two", "disabled": true, }) if !assert.NoError(t, err) { return } // Create and perform a work unit, with no output _, err = one.AddWorkUnit("a", map[string]interface{}{}, coordinate.WorkUnitMeta{}) assert.NoError(t, err) sts.WorkSpec = one attempt = sts.RequestOneAttempt(t) err = attempt.Finish(nil) assert.NoError(t, err) units, err = two.WorkUnits(coordinate.WorkUnitQuery{}) if assert.NoError(t, err) { assert.Empty(t, units) } // Create and perform a work unit, with a map output _, err = one.AddWorkUnit("b", map[string]interface{}{}, coordinate.WorkUnitMeta{}) assert.NoError(t, err) attempt = sts.RequestOneAttempt(t) err = attempt.Finish(map[string]interface{}{ "output": map[string]interface{}{ "two_b": map[string]interface{}{"k": "v"}, }, }) assert.NoError(t, err) units, err = two.WorkUnits(coordinate.WorkUnitQuery{}) if assert.NoError(t, err) { assert.Len(t, units, 1) if assert.Contains(t, units, "two_b") { DataMatches(t, units["two_b"], map[string]interface{}{"k": "v"}) } } // Create and perform a work unit, with a slice output _, err = one.AddWorkUnit("c", map[string]interface{}{}, coordinate.WorkUnitMeta{}) assert.NoError(t, err) attempt = sts.RequestOneAttempt(t) err = attempt.Finish(map[string]interface{}{ "output": []string{"two_c", "two_cc"}, }) assert.NoError(t, err) units, err = two.WorkUnits(coordinate.WorkUnitQuery{}) if assert.NoError(t, err) { assert.Len(t, units, 3) assert.Contains(t, units, "two_b") assert.Contains(t, units, "two_cc") if assert.Contains(t, units, "two_c") { DataEmpty(t, units["two_c"]) } } // Put the output in the original work unit data _, err = one.AddWorkUnit("d", map[string]interface{}{ "output": []string{"two_d"}, }, coordinate.WorkUnitMeta{}) assert.NoError(t, err) attempt = sts.RequestOneAttempt(t) err = attempt.Finish(nil) assert.NoError(t, err) units, err = two.WorkUnits(coordinate.WorkUnitQuery{}) if assert.NoError(t, err) { assert.Len(t, units, 4) assert.Contains(t, units, "two_b") assert.Contains(t, units, "two_c") assert.Contains(t, units, "two_cc") assert.Contains(t, units, "two_d") } }
// TestAttemptLifetime validates a basic attempt lifetime. func TestAttemptLifetime(t *testing.T) { var ( err error data map[string]interface{} attempt, attempt2 coordinate.Attempt ) sts := SimpleTestSetup{ NamespaceName: "TestAttemptLifetime", WorkerName: "worker", WorkSpecName: "spec", WorkUnitName: "a", } sts.SetUp(t) defer sts.TearDown(t) // The work unit should be "available" sts.CheckUnitStatus(t, coordinate.AvailableUnit) // The work unit data should be defined but empty DataEmpty(t, sts.WorkUnit) // Get an attempt for it attempt = sts.RequestOneAttempt(t) // The work unit and attempt should both be "pending" sts.CheckUnitStatus(t, coordinate.PendingUnit) AttemptStatus(t, coordinate.Pending, attempt) // The active attempt for the unit should match this attempt2, err = sts.WorkUnit.ActiveAttempt() if assert.NoError(t, err) { AttemptMatches(t, attempt, attempt2) } // There should be one active attempt for the worker and it should // also match attempts, err := sts.Worker.ActiveAttempts() if assert.NoError(t, err) { if assert.Len(t, attempts, 1) { AttemptMatches(t, attempt, attempts[0]) } } // The work unit data should (still) be defined but empty DataEmpty(t, sts.WorkUnit) // Now finish the attempt with some updated data err = attempt.Finish(map[string]interface{}{ "outputs": []string{"yes"}, }) assert.NoError(t, err) // The unit and should report "finished" sts.CheckUnitStatus(t, coordinate.FinishedUnit) AttemptStatus(t, coordinate.Finished, attempt) // The attempt should still be the active attempt for the unit attempt2, err = sts.WorkUnit.ActiveAttempt() if assert.NoError(t, err) { AttemptMatches(t, attempt, attempt2) } // The attempt should not be in the active attempt list for the worker attempts, err = sts.Worker.ActiveAttempts() if assert.NoError(t, err) { assert.Empty(t, attempts) } // Both the unit and the worker should have one archived attempt attempts, err = sts.WorkUnit.Attempts() if assert.NoError(t, err) { if assert.Len(t, attempts, 1) { AttemptMatches(t, attempt, attempts[0]) } } attempts, err = sts.Worker.AllAttempts() if assert.NoError(t, err) { if assert.Len(t, attempts, 1) { AttemptMatches(t, attempt, attempts[0]) } } // This should have updated the visible work unit data too data, err = sts.WorkUnit.Data() if assert.NoError(t, err) { assert.Len(t, data, 1) if assert.Contains(t, data, "outputs") { if assert.Len(t, data["outputs"], 1) { assert.Equal(t, "yes", reflect.ValueOf(data["outputs"]).Index(0).Interface()) } } } // For bonus points, force-clear the active attempt err = sts.WorkUnit.ClearActiveAttempt() assert.NoError(t, err) // This should have pushed the unit back to available sts.CheckUnitStatus(t, coordinate.AvailableUnit) // This also should have reset the work unit data DataEmpty(t, sts.WorkUnit) // But, this should not have reset the historical attempts attempts, err = sts.WorkUnit.Attempts() if assert.NoError(t, err) { if assert.Len(t, attempts, 1) { AttemptMatches(t, attempt, attempts[0]) } } attempts, err = sts.Worker.AllAttempts() if assert.NoError(t, err) { if assert.Len(t, attempts, 1) { AttemptMatches(t, attempt, attempts[0]) } } }