// workUnitStatus extracts a summary of the status of a single work // unit. This produces its external coordinate status and the active // attempt (if any) on success. func workUnitStatus(workUnit coordinate.WorkUnit) (status WorkUnitStatus, attempt coordinate.Attempt, err error) { var attemptStatus coordinate.AttemptStatus attempt, err = workUnit.ActiveAttempt() if err == nil && attempt == nil { // NB: this also includes "delayed" status status = Available return } if err == nil { attemptStatus, err = attempt.Status() } if err == nil { switch attemptStatus { case coordinate.Pending: status = Pending case coordinate.Expired: status = Available attempt = nil case coordinate.Finished: status = Finished case coordinate.Failed: status = Failed case coordinate.Retryable: status = Available attempt = nil default: err = errors.New("unexpected attempt status") } } return }
func (api *restAPI) WorkUnitsPost(ctx *context, in interface{}) (interface{}, error) { var ( err error unit coordinate.WorkUnit short restdata.WorkUnitShort ) repr, valid := in.(restdata.WorkUnit) if !valid { err = errUnmarshal } if err == nil { var meta coordinate.WorkUnitMeta if repr.Meta != nil { meta = *repr.Meta } unit, err = ctx.WorkSpec.AddWorkUnit(repr.Name, repr.Data, meta) } if err == nil { err = api.fillWorkUnitShort(ctx.Namespace, ctx.WorkSpec, unit.Name(), &short) } if err == nil { resp := responseCreated{ Location: short.URL, Body: short, } return resp, nil } return nil, err }
// TestChainingExpiry tests that, if an attempt finishes but is no // longer the active attempt, then its successor work units will not // be created. func TestChainingExpiry(t *testing.T) { var ( one, two coordinate.WorkSpec err error unit coordinate.WorkUnit ) sts := SimpleTestSetup{ NamespaceName: "TestChainingExpiry", WorkerName: "worker", } sts.SetUp(t) defer sts.TearDown(t) one, err = sts.Namespace.SetWorkSpec(map[string]interface{}{ "name": "one", "then": "two", }) if !assert.NoError(t, err) { return } sts.WorkSpec = one two, err = sts.Namespace.SetWorkSpec(map[string]interface{}{ "name": "two", "disabled": true, }) if !assert.NoError(t, err) { return } // Create and perform a work unit, with no output unit, err = one.AddWorkUnit("a", map[string]interface{}{}, coordinate.WorkUnitMeta{}) if !assert.NoError(t, err) { return } attempt := sts.RequestOneAttempt(t) // But wait! We got preempted err = unit.ClearActiveAttempt() assert.NoError(t, err) sts.RequestOneAttempt(t) // Now, let the original attempt finish, trying to generate // more outputs err = attempt.Finish(map[string]interface{}{ "output": []string{"unit"}, }) assert.NoError(t, err) // Since attempt is no longer active, this shouldn't generate // new outputs units, err := two.WorkUnits(coordinate.WorkUnitQuery{}) if assert.NoError(t, err) { assert.Empty(t, units) } }
// TestWorkUnitPrioritySet tests two different ways of setting work unit // priority. func TestWorkUnitPrioritySet(t *testing.T) { var ( err error unit coordinate.WorkUnit ) sts := SimpleTestSetup{ NamespaceName: "TestWorkUnitPrioritySet", WorkerName: "worker", WorkSpecName: "spec", } sts.SetUp(t) defer sts.TearDown(t) unit, err = sts.WorkSpec.AddWorkUnit("a", map[string]interface{}{}, coordinate.WorkUnitMeta{}) if assert.NoError(t, err) { UnitHasPriority(t, unit, 0.0) } unit, err = sts.WorkSpec.AddWorkUnit("b", map[string]interface{}{}, coordinate.WorkUnitMeta{}) if assert.NoError(t, err) { err = unit.SetPriority(10.0) if assert.NoError(t, err) { UnitHasPriority(t, unit, 10.0) } } unit, err = sts.WorkSpec.AddWorkUnit("c", map[string]interface{}{}, coordinate.WorkUnitMeta{}) assert.NoError(t, err) err = sts.WorkSpec.SetWorkUnitPriorities(coordinate.WorkUnitQuery{ Names: []string{"c"}, }, 20.0) if assert.NoError(t, err) { UnitHasPriority(t, unit, 20.0) } unit, err = sts.WorkSpec.AddWorkUnit("d", map[string]interface{}{}, coordinate.WorkUnitMeta{}) assert.NoError(t, err) err = sts.WorkSpec.AdjustWorkUnitPriorities(coordinate.WorkUnitQuery{ Names: []string{"d"}, }, 20.0) if assert.NoError(t, err) { UnitHasPriority(t, unit, 20.0) } err = sts.WorkSpec.AdjustWorkUnitPriorities(coordinate.WorkUnitQuery{ Names: []string{"d"}, }, 10.0) if assert.NoError(t, err) { UnitHasPriority(t, unit, 30.0) } unit, err = sts.WorkSpec.WorkUnit("b") if assert.NoError(t, err) { UnitHasPriority(t, unit, 10.0) } sts.CheckWorkUnitOrder(t, "d", "c", "b", "a") }
func (w *worker) MakeAttempt(unit coordinate.WorkUnit, lifetime time.Duration) (coordinate.Attempt, error) { req := restdata.AttemptSpecific{ WorkSpec: unit.WorkSpec().Name(), WorkUnit: unit.Name(), Lifetime: lifetime, } var a attempt err := w.PostTo(w.Representation.MakeAttemptURL, map[string]interface{}{}, req, &a.Representation) if err != nil { return nil, err } a.URL, err = w.URL.Parse(a.Representation.URL) if err != nil { return nil, err } aUnit, _ := unit.(*workUnit) err = a.fillReferences(aUnit, w) if err != nil { return nil, err } return &a, nil }
// UnitHasPriority validates the priority of a work unit. func UnitHasPriority(t *testing.T, unit coordinate.WorkUnit, priority float64) { actual, err := unit.Priority() if assert.NoError(t, err) { assert.Equal(t, priority, actual) } }
// TestByRuntime creates two work specs with different runtimes, and // validates that requests that want a specific runtime get it. func TestByRuntime(t *testing.T) { // The specific thing we'll simulate here is one Python // worker, using the jobserver interface, with an empty // runtime string, plus one Go worker, using the native API, // with a "go" runtime. var ( err error pSpec, gSpec coordinate.WorkSpec pUnit, gUnit coordinate.WorkUnit attempts []coordinate.Attempt ) sts := SimpleTestSetup{ NamespaceName: "TestByRuntime", WorkerName: "worker", } sts.SetUp(t) defer sts.TearDown(t) pSpec, err = sts.Namespace.SetWorkSpec(map[string]interface{}{ "name": "p", }) if !assert.NoError(t, err) { return } pUnit, err = pSpec.AddWorkUnit("p", map[string]interface{}{}, coordinate.WorkUnitMeta{}) if !assert.NoError(t, err) { return } gSpec, err = sts.Namespace.SetWorkSpec(map[string]interface{}{ "name": "g", "runtime": "go", }) if !assert.NoError(t, err) { return } gUnit, err = gSpec.AddWorkUnit("g", map[string]interface{}{}, coordinate.WorkUnitMeta{}) if !assert.NoError(t, err) { return } // If we use default settings for RequestAttempts, we should // get back both work units Clock.Add(5 * time.Second) attempts, err = sts.Worker.RequestAttempts(coordinate.AttemptRequest{}) if assert.NoError(t, err) && assert.Len(t, attempts, 1) { err = attempts[0].Finish(map[string]interface{}{}) assert.NoError(t, err) wasP := attempts[0].WorkUnit().Name() == "p" // Get more attempts Clock.Add(time.Duration(5) * time.Second) attempts, err = sts.Worker.RequestAttempts(coordinate.AttemptRequest{}) if assert.NoError(t, err) && assert.Len(t, attempts, 1) { err = attempts[0].Finish(map[string]interface{}{}) assert.NoError(t, err) // Should have gotten the other work spec if wasP { assert.Equal(t, "g", attempts[0].WorkUnit().Name()) } else { assert.Equal(t, "p", attempts[0].WorkUnit().Name()) } } // Now there shouldn't be anything more Clock.Add(5 * time.Second) sts.RequestNoAttempts(t) } // Reset the world err = pUnit.ClearActiveAttempt() assert.NoError(t, err) err = gUnit.ClearActiveAttempt() assert.NoError(t, err) // What we expect to get from jobserver Clock.Add(5 * time.Second) attempts, err = sts.Worker.RequestAttempts(coordinate.AttemptRequest{ Runtimes: []string{""}, }) if assert.NoError(t, err) && assert.Len(t, attempts, 1) { assert.Equal(t, "p", attempts[0].WorkUnit().Name()) err = attempts[0].Retry(map[string]interface{}{}, time.Duration(0)) assert.NoError(t, err) } // A more sophisticated Python check Clock.Add(5 * time.Second) attempts, err = sts.Worker.RequestAttempts(coordinate.AttemptRequest{ Runtimes: []string{"python", "python_2", "python_2.7", ""}, }) if assert.NoError(t, err) && assert.Len(t, attempts, 1) { assert.Equal(t, "p", attempts[0].WorkUnit().Name()) err = attempts[0].Retry(map[string]interface{}{}, time.Duration(0)) assert.NoError(t, err) } // What we expect to get from Go land Clock.Add(5 * time.Second) attempts, err = sts.Worker.RequestAttempts(coordinate.AttemptRequest{ Runtimes: []string{"go"}, }) if assert.NoError(t, err) && assert.Len(t, attempts, 1) { assert.Equal(t, "g", attempts[0].WorkUnit().Name()) err = attempts[0].Retry(map[string]interface{}{}, time.Duration(0)) assert.NoError(t, err) } }
// UpdateWorkUnit causes some state change in a work unit. If the // work unit is pending, this is the principal interface to complete // or renew it; if it is already complete this can cause it to be // retried. func (jobs *JobServer) UpdateWorkUnit( workSpecName string, workUnitKey string, options map[string]interface{}, ) (bool, string, error) { // Note that in several corner cases, the behavior of this as // written disagrees with Python coordinated's: // // * If neither "lease_time" nor "status" is specified, // Python coordinated immediately returns False without // checking if workUnitKey is valid // // * Python coordinated allows arbitrary status changes, // including AVAILABLE -> FINISHED // // * This openly ignores "worker_id", as distinct from Python // coordinated, which logs an obscure warning and changes it, // but only on a renew var ( attempt coordinate.Attempt changed bool err error status coordinate.AttemptStatus uwuOptions UpdateWorkUnitOptions workSpec coordinate.WorkSpec workUnit coordinate.WorkUnit ) err = decode(&uwuOptions, options) if err == nil { workSpec, err = jobs.Namespace.WorkSpec(workSpecName) } if err == nil { workUnit, err = workSpec.WorkUnit(workUnitKey) } if err == nil { if workUnit == nil { return false, fmt.Sprintf("no such work unit key=%v", workUnitKey), nil } } if err == nil { attempt, err = workUnit.ActiveAttempt() } if err == nil && attempt != nil { status, err = attempt.Status() } if err == nil && attempt != nil { if status == coordinate.Expired || status == coordinate.Retryable { // The Python Coordinate API sees both of these // statuses as "available", and we want to fall // into the next block. attempt = nil } } if err == nil && attempt == nil { // Caller is trying to manipulate an AVAILABLE work // unit. Either they are trying to change the work // unit data in place, or they are trying to jump a // work unit directly to a completed state. (The // latter is possible during the Python work unit // parent cleanup, if the timing is bad.) if uwuOptions.Status == Available || uwuOptions.Status == 0 { // The only thing we are doing is changing the // work unit data. if uwuOptions.Data != nil { meta, err := workUnit.Meta() if err == nil { _, err = workSpec.AddWorkUnit(workUnit.Name(), uwuOptions.Data, meta) } if err == nil { changed = true } } return changed && err == nil, "", err } // Otherwise we are trying to transition to another // state; so force-create an attempt. worker, err := jobs.Namespace.Worker(uwuOptions.WorkerID) if err == nil { attempt, err = worker.MakeAttempt(workUnit, uwuOptions.LeaseDuration()) status = coordinate.Pending } } if err == nil { switch status { case coordinate.Pending: changed = true // or there's an error switch uwuOptions.Status { case 0, Pending: err = attempt.Renew(uwuOptions.LeaseDuration(), uwuOptions.Data) case Available: err = attempt.Expire(uwuOptions.Data) case Finished: err = attempt.Finish(uwuOptions.Data) case Failed: err = attempt.Fail(uwuOptions.Data) default: err = errors.New("update_work_unit invalid status") } case coordinate.Expired: err = errors.New("update_work_unit logic error, trying to refresh expired unit") case coordinate.Finished: switch uwuOptions.Status { case 0, Finished: changed = false // no-op case Available: err = workUnit.ClearActiveAttempt() changed = true case Failed: changed = false // see below default: err = errors.New("update_work_unit cannot change finished unit") } case coordinate.Failed: switch uwuOptions.Status { case 0, Failed: changed = false // no-op case Available: // "retry" err = workUnit.ClearActiveAttempt() changed = true case Finished: // The Python worker, with two separate // processes, has a race wherein there // could be 15 seconds to go, the parent // kills off the child, and the child // finishes successfully, all at the same // time. In that case the successful // finish should win. err = attempt.Finish(nil) changed = true default: err = errors.New("update_work_unit cannot change failed unit") } case coordinate.Retryable: err = errors.New("update_work_unit logic error, trying to refresh retryable unit") default: err = fmt.Errorf("update_work_unit invalid attempt status %+v", status) } } return changed && err == nil, "", err }
func (api *restAPI) fillWorkUnit(namespace coordinate.Namespace, spec coordinate.WorkSpec, unit coordinate.WorkUnit, repr *restdata.WorkUnit) error { err := api.fillWorkUnitShort(namespace, spec, unit.Name(), &repr.WorkUnitShort) if err == nil { repr.Data, err = unit.Data() } if err == nil { var meta coordinate.WorkUnitMeta meta, err = unit.Meta() repr.Meta = &meta } if err == nil { repr.Status, err = unit.Status() } if err == nil { err = buildURLs(api.Router, "namespace", namespace.Name(), "spec", spec.Name(), "unit", unit.Name(), ). URL(&repr.WorkSpecURL, "workSpec"). URL(&repr.AttemptsURL, "workUnitAttempts"). Error } if err == nil { var attempt coordinate.Attempt attempt, err = unit.ActiveAttempt() if err == nil && attempt != nil { // This is cheating, a little, but it's probably // the easiest way to reuse this code var short restdata.AttemptShort err = api.fillAttemptShort(namespace, attempt, &short) if err == nil { repr.ActiveAttemptURL = short.URL } } } return err }