Example #1
0
// workUnitStatus extracts a summary of the status of a single work
// unit.  This produces its external coordinate status and the active
// attempt (if any) on success.
func workUnitStatus(workUnit coordinate.WorkUnit) (status WorkUnitStatus, attempt coordinate.Attempt, err error) {
	var attemptStatus coordinate.AttemptStatus
	attempt, err = workUnit.ActiveAttempt()
	if err == nil && attempt == nil {
		// NB: this also includes "delayed" status
		status = Available
		return
	}
	if err == nil {
		attemptStatus, err = attempt.Status()
	}
	if err == nil {
		switch attemptStatus {
		case coordinate.Pending:
			status = Pending
		case coordinate.Expired:
			status = Available
			attempt = nil
		case coordinate.Finished:
			status = Finished
		case coordinate.Failed:
			status = Failed
		case coordinate.Retryable:
			status = Available
			attempt = nil
		default:
			err = errors.New("unexpected attempt status")
		}
	}
	return
}
Example #2
0
func (api *restAPI) WorkUnitsPost(ctx *context, in interface{}) (interface{}, error) {
	var (
		err   error
		unit  coordinate.WorkUnit
		short restdata.WorkUnitShort
	)
	repr, valid := in.(restdata.WorkUnit)
	if !valid {
		err = errUnmarshal
	}
	if err == nil {
		var meta coordinate.WorkUnitMeta
		if repr.Meta != nil {
			meta = *repr.Meta
		}
		unit, err = ctx.WorkSpec.AddWorkUnit(repr.Name, repr.Data, meta)
	}
	if err == nil {
		err = api.fillWorkUnitShort(ctx.Namespace, ctx.WorkSpec, unit.Name(), &short)
	}
	if err == nil {
		resp := responseCreated{
			Location: short.URL,
			Body:     short,
		}
		return resp, nil
	}
	return nil, err
}
Example #3
0
// TestChainingExpiry tests that, if an attempt finishes but is no
// longer the active attempt, then its successor work units will not
// be created.
func TestChainingExpiry(t *testing.T) {
	var (
		one, two coordinate.WorkSpec
		err      error
		unit     coordinate.WorkUnit
	)

	sts := SimpleTestSetup{
		NamespaceName: "TestChainingExpiry",
		WorkerName:    "worker",
	}
	sts.SetUp(t)
	defer sts.TearDown(t)

	one, err = sts.Namespace.SetWorkSpec(map[string]interface{}{
		"name": "one",
		"then": "two",
	})
	if !assert.NoError(t, err) {
		return
	}
	sts.WorkSpec = one

	two, err = sts.Namespace.SetWorkSpec(map[string]interface{}{
		"name":     "two",
		"disabled": true,
	})
	if !assert.NoError(t, err) {
		return
	}

	// Create and perform a work unit, with no output
	unit, err = one.AddWorkUnit("a", map[string]interface{}{}, coordinate.WorkUnitMeta{})
	if !assert.NoError(t, err) {
		return
	}

	attempt := sts.RequestOneAttempt(t)

	// But wait!  We got preempted
	err = unit.ClearActiveAttempt()
	assert.NoError(t, err)
	sts.RequestOneAttempt(t)

	// Now, let the original attempt finish, trying to generate
	// more outputs
	err = attempt.Finish(map[string]interface{}{
		"output": []string{"unit"},
	})
	assert.NoError(t, err)

	// Since attempt is no longer active, this shouldn't generate
	// new outputs
	units, err := two.WorkUnits(coordinate.WorkUnitQuery{})
	if assert.NoError(t, err) {
		assert.Empty(t, units)
	}
}
Example #4
0
// TestWorkUnitPrioritySet tests two different ways of setting work unit
// priority.
func TestWorkUnitPrioritySet(t *testing.T) {
	var (
		err  error
		unit coordinate.WorkUnit
	)
	sts := SimpleTestSetup{
		NamespaceName: "TestWorkUnitPrioritySet",
		WorkerName:    "worker",
		WorkSpecName:  "spec",
	}
	sts.SetUp(t)
	defer sts.TearDown(t)

	unit, err = sts.WorkSpec.AddWorkUnit("a", map[string]interface{}{}, coordinate.WorkUnitMeta{})
	if assert.NoError(t, err) {
		UnitHasPriority(t, unit, 0.0)
	}

	unit, err = sts.WorkSpec.AddWorkUnit("b", map[string]interface{}{}, coordinate.WorkUnitMeta{})
	if assert.NoError(t, err) {
		err = unit.SetPriority(10.0)
		if assert.NoError(t, err) {
			UnitHasPriority(t, unit, 10.0)
		}
	}

	unit, err = sts.WorkSpec.AddWorkUnit("c", map[string]interface{}{}, coordinate.WorkUnitMeta{})
	assert.NoError(t, err)
	err = sts.WorkSpec.SetWorkUnitPriorities(coordinate.WorkUnitQuery{
		Names: []string{"c"},
	}, 20.0)
	if assert.NoError(t, err) {
		UnitHasPriority(t, unit, 20.0)
	}

	unit, err = sts.WorkSpec.AddWorkUnit("d", map[string]interface{}{}, coordinate.WorkUnitMeta{})
	assert.NoError(t, err)
	err = sts.WorkSpec.AdjustWorkUnitPriorities(coordinate.WorkUnitQuery{
		Names: []string{"d"},
	}, 20.0)
	if assert.NoError(t, err) {
		UnitHasPriority(t, unit, 20.0)
	}
	err = sts.WorkSpec.AdjustWorkUnitPriorities(coordinate.WorkUnitQuery{
		Names: []string{"d"},
	}, 10.0)
	if assert.NoError(t, err) {
		UnitHasPriority(t, unit, 30.0)
	}

	unit, err = sts.WorkSpec.WorkUnit("b")
	if assert.NoError(t, err) {
		UnitHasPriority(t, unit, 10.0)
	}

	sts.CheckWorkUnitOrder(t, "d", "c", "b", "a")
}
Example #5
0
func (w *worker) MakeAttempt(unit coordinate.WorkUnit, lifetime time.Duration) (coordinate.Attempt, error) {
	req := restdata.AttemptSpecific{
		WorkSpec: unit.WorkSpec().Name(),
		WorkUnit: unit.Name(),
		Lifetime: lifetime,
	}
	var a attempt
	err := w.PostTo(w.Representation.MakeAttemptURL, map[string]interface{}{}, req, &a.Representation)
	if err != nil {
		return nil, err
	}

	a.URL, err = w.URL.Parse(a.Representation.URL)
	if err != nil {
		return nil, err
	}
	aUnit, _ := unit.(*workUnit)
	err = a.fillReferences(aUnit, w)
	if err != nil {
		return nil, err
	}

	return &a, nil
}
Example #6
0
// UnitHasPriority validates the priority of a work unit.
func UnitHasPriority(t *testing.T, unit coordinate.WorkUnit, priority float64) {
	actual, err := unit.Priority()
	if assert.NoError(t, err) {
		assert.Equal(t, priority, actual)
	}
}
Example #7
0
// TestByRuntime creates two work specs with different runtimes, and
// validates that requests that want a specific runtime get it.
func TestByRuntime(t *testing.T) {
	// The specific thing we'll simulate here is one Python
	// worker, using the jobserver interface, with an empty
	// runtime string, plus one Go worker, using the native API,
	// with a "go" runtime.
	var (
		err          error
		pSpec, gSpec coordinate.WorkSpec
		pUnit, gUnit coordinate.WorkUnit
		attempts     []coordinate.Attempt
	)

	sts := SimpleTestSetup{
		NamespaceName: "TestByRuntime",
		WorkerName:    "worker",
	}
	sts.SetUp(t)
	defer sts.TearDown(t)

	pSpec, err = sts.Namespace.SetWorkSpec(map[string]interface{}{
		"name": "p",
	})
	if !assert.NoError(t, err) {
		return
	}
	pUnit, err = pSpec.AddWorkUnit("p", map[string]interface{}{}, coordinate.WorkUnitMeta{})
	if !assert.NoError(t, err) {
		return
	}

	gSpec, err = sts.Namespace.SetWorkSpec(map[string]interface{}{
		"name":    "g",
		"runtime": "go",
	})
	if !assert.NoError(t, err) {
		return
	}
	gUnit, err = gSpec.AddWorkUnit("g", map[string]interface{}{}, coordinate.WorkUnitMeta{})
	if !assert.NoError(t, err) {
		return
	}

	// If we use default settings for RequestAttempts, we should
	// get back both work units
	Clock.Add(5 * time.Second)
	attempts, err = sts.Worker.RequestAttempts(coordinate.AttemptRequest{})
	if assert.NoError(t, err) && assert.Len(t, attempts, 1) {
		err = attempts[0].Finish(map[string]interface{}{})
		assert.NoError(t, err)

		wasP := attempts[0].WorkUnit().Name() == "p"

		// Get more attempts
		Clock.Add(time.Duration(5) * time.Second)
		attempts, err = sts.Worker.RequestAttempts(coordinate.AttemptRequest{})
		if assert.NoError(t, err) && assert.Len(t, attempts, 1) {
			err = attempts[0].Finish(map[string]interface{}{})
			assert.NoError(t, err)

			// Should have gotten the other work spec
			if wasP {
				assert.Equal(t, "g", attempts[0].WorkUnit().Name())
			} else {
				assert.Equal(t, "p", attempts[0].WorkUnit().Name())
			}
		}

		// Now there shouldn't be anything more
		Clock.Add(5 * time.Second)
		sts.RequestNoAttempts(t)
	}

	// Reset the world
	err = pUnit.ClearActiveAttempt()
	assert.NoError(t, err)
	err = gUnit.ClearActiveAttempt()
	assert.NoError(t, err)

	// What we expect to get from jobserver
	Clock.Add(5 * time.Second)
	attempts, err = sts.Worker.RequestAttempts(coordinate.AttemptRequest{
		Runtimes: []string{""},
	})
	if assert.NoError(t, err) && assert.Len(t, attempts, 1) {
		assert.Equal(t, "p", attempts[0].WorkUnit().Name())
		err = attempts[0].Retry(map[string]interface{}{}, time.Duration(0))
		assert.NoError(t, err)
	}

	// A more sophisticated Python check
	Clock.Add(5 * time.Second)
	attempts, err = sts.Worker.RequestAttempts(coordinate.AttemptRequest{
		Runtimes: []string{"python", "python_2", "python_2.7", ""},
	})
	if assert.NoError(t, err) && assert.Len(t, attempts, 1) {
		assert.Equal(t, "p", attempts[0].WorkUnit().Name())
		err = attempts[0].Retry(map[string]interface{}{}, time.Duration(0))
		assert.NoError(t, err)
	}

	// What we expect to get from Go land
	Clock.Add(5 * time.Second)
	attempts, err = sts.Worker.RequestAttempts(coordinate.AttemptRequest{
		Runtimes: []string{"go"},
	})
	if assert.NoError(t, err) && assert.Len(t, attempts, 1) {
		assert.Equal(t, "g", attempts[0].WorkUnit().Name())
		err = attempts[0].Retry(map[string]interface{}{}, time.Duration(0))
		assert.NoError(t, err)
	}
}
Example #8
0
// UpdateWorkUnit causes some state change in a work unit.  If the
// work unit is pending, this is the principal interface to complete
// or renew it; if it is already complete this can cause it to be
// retried.
func (jobs *JobServer) UpdateWorkUnit(
	workSpecName string,
	workUnitKey string,
	options map[string]interface{},
) (bool, string, error) {
	// Note that in several corner cases, the behavior of this as
	// written disagrees with Python coordinated's:
	//
	// * If neither "lease_time" nor "status" is specified,
	//   Python coordinated immediately returns False without
	//   checking if workUnitKey is valid
	//
	// * Python coordinated allows arbitrary status changes,
	//   including AVAILABLE -> FINISHED
	//
	// * This openly ignores "worker_id", as distinct from Python
	//   coordinated, which logs an obscure warning and changes it,
	//   but only on a renew
	var (
		attempt    coordinate.Attempt
		changed    bool
		err        error
		status     coordinate.AttemptStatus
		uwuOptions UpdateWorkUnitOptions
		workSpec   coordinate.WorkSpec
		workUnit   coordinate.WorkUnit
	)
	err = decode(&uwuOptions, options)
	if err == nil {
		workSpec, err = jobs.Namespace.WorkSpec(workSpecName)
	}
	if err == nil {
		workUnit, err = workSpec.WorkUnit(workUnitKey)
	}
	if err == nil {
		if workUnit == nil {
			return false, fmt.Sprintf("no such work unit key=%v", workUnitKey), nil
		}
	}
	if err == nil {
		attempt, err = workUnit.ActiveAttempt()
	}
	if err == nil && attempt != nil {
		status, err = attempt.Status()
	}
	if err == nil && attempt != nil {
		if status == coordinate.Expired || status == coordinate.Retryable {
			// The Python Coordinate API sees both of these
			// statuses as "available", and we want to fall
			// into the next block.
			attempt = nil
		}
	}
	if err == nil && attempt == nil {
		// Caller is trying to manipulate an AVAILABLE work
		// unit.  Either they are trying to change the work
		// unit data in place, or they are trying to jump a
		// work unit directly to a completed state.  (The
		// latter is possible during the Python work unit
		// parent cleanup, if the timing is bad.)
		if uwuOptions.Status == Available || uwuOptions.Status == 0 {
			// The only thing we are doing is changing the
			// work unit data.
			if uwuOptions.Data != nil {
				meta, err := workUnit.Meta()
				if err == nil {
					_, err = workSpec.AddWorkUnit(workUnit.Name(), uwuOptions.Data, meta)
				}
				if err == nil {
					changed = true
				}
			}
			return changed && err == nil, "", err
		}
		// Otherwise we are trying to transition to another
		// state; so force-create an attempt.
		worker, err := jobs.Namespace.Worker(uwuOptions.WorkerID)
		if err == nil {
			attempt, err = worker.MakeAttempt(workUnit, uwuOptions.LeaseDuration())
			status = coordinate.Pending
		}
	}
	if err == nil {
		switch status {
		case coordinate.Pending:
			changed = true // or there's an error
			switch uwuOptions.Status {
			case 0, Pending:
				err = attempt.Renew(uwuOptions.LeaseDuration(), uwuOptions.Data)
			case Available:
				err = attempt.Expire(uwuOptions.Data)
			case Finished:
				err = attempt.Finish(uwuOptions.Data)
			case Failed:
				err = attempt.Fail(uwuOptions.Data)
			default:
				err = errors.New("update_work_unit invalid status")
			}
		case coordinate.Expired:
			err = errors.New("update_work_unit logic error, trying to refresh expired unit")
		case coordinate.Finished:
			switch uwuOptions.Status {
			case 0, Finished:
				changed = false // no-op
			case Available:
				err = workUnit.ClearActiveAttempt()
				changed = true
			case Failed:
				changed = false // see below
			default:
				err = errors.New("update_work_unit cannot change finished unit")
			}
		case coordinate.Failed:
			switch uwuOptions.Status {
			case 0, Failed:
				changed = false // no-op
			case Available: // "retry"
				err = workUnit.ClearActiveAttempt()
				changed = true
			case Finished:
				// The Python worker, with two separate
				// processes, has a race wherein there
				// could be 15 seconds to go, the parent
				// kills off the child, and the child
				// finishes successfully, all at the same
				// time.  In that case the successful
				// finish should win.
				err = attempt.Finish(nil)
				changed = true
			default:
				err = errors.New("update_work_unit cannot change failed unit")
			}
		case coordinate.Retryable:
			err = errors.New("update_work_unit logic error, trying to refresh retryable unit")
		default:
			err = fmt.Errorf("update_work_unit invalid attempt status %+v", status)
		}
	}
	return changed && err == nil, "", err
}
Example #9
0
func (api *restAPI) fillWorkUnit(namespace coordinate.Namespace, spec coordinate.WorkSpec, unit coordinate.WorkUnit, repr *restdata.WorkUnit) error {
	err := api.fillWorkUnitShort(namespace, spec, unit.Name(), &repr.WorkUnitShort)
	if err == nil {
		repr.Data, err = unit.Data()
	}
	if err == nil {
		var meta coordinate.WorkUnitMeta
		meta, err = unit.Meta()
		repr.Meta = &meta
	}
	if err == nil {
		repr.Status, err = unit.Status()
	}
	if err == nil {
		err = buildURLs(api.Router,
			"namespace", namespace.Name(),
			"spec", spec.Name(),
			"unit", unit.Name(),
		).
			URL(&repr.WorkSpecURL, "workSpec").
			URL(&repr.AttemptsURL, "workUnitAttempts").
			Error
	}
	if err == nil {
		var attempt coordinate.Attempt
		attempt, err = unit.ActiveAttempt()
		if err == nil && attempt != nil {
			// This is cheating, a little, but it's probably
			// the easiest way to reuse this code
			var short restdata.AttemptShort
			err = api.fillAttemptShort(namespace, attempt, &short)
			if err == nil {
				repr.ActiveAttemptURL = short.URL
			}
		}
	}
	return err
}