Exemple #1
0
// workUnitStatus extracts a summary of the status of a single work
// unit.  This produces its external coordinate status and the active
// attempt (if any) on success.
func workUnitStatus(workUnit coordinate.WorkUnit) (status WorkUnitStatus, attempt coordinate.Attempt, err error) {
	var attemptStatus coordinate.AttemptStatus
	attempt, err = workUnit.ActiveAttempt()
	if err == nil && attempt == nil {
		status = Available
		return
	}
	if err == nil {
		attemptStatus, err = attempt.Status()
	}
	if err == nil {
		switch attemptStatus {
		case coordinate.Pending:
			status = Pending
		case coordinate.Expired:
			status = Available
			attempt = nil
		case coordinate.Finished:
			status = Finished
		case coordinate.Failed:
			status = Failed
		case coordinate.Retryable:
			status = Available
			attempt = nil
		default:
			err = errors.New("unexpected attempt status")
		}
	}
	return
}
Exemple #2
0
// TestChainingExpiry tests that, if an attempt finishes but is no
// longer the active attempt, then its successor work units will not
// be created.
func (s *Suite) TestChainingExpiry(c *check.C) {
	var (
		one, two coordinate.WorkSpec
		err      error
		worker   coordinate.Worker
		unit     coordinate.WorkUnit
		attempts []coordinate.Attempt
	)

	one, err = s.Namespace.SetWorkSpec(map[string]interface{}{
		"name": "one",
		"then": "two",
	})
	c.Assert(err, check.IsNil)

	two, err = s.Namespace.SetWorkSpec(map[string]interface{}{
		"name":     "two",
		"disabled": true,
	})
	c.Assert(err, check.IsNil)

	worker, err = s.Namespace.Worker("worker")
	c.Assert(err, check.IsNil)

	// Create and perform a work unit, with no output
	unit, err = one.AddWorkUnit("a", map[string]interface{}{}, 0.0)
	c.Assert(err, check.IsNil)

	attempts, err = worker.RequestAttempts(coordinate.AttemptRequest{})
	c.Assert(err, check.IsNil)
	c.Assert(attempts, check.HasLen, 1)
	attempt := attempts[0]

	// But wait!  We got preempted
	err = unit.ClearActiveAttempt()
	c.Assert(err, check.IsNil)
	attempts, err = worker.RequestAttempts(coordinate.AttemptRequest{})
	c.Assert(err, check.IsNil)
	c.Assert(attempts, check.HasLen, 1)

	// Now, let the original attempt finish, trying to generate
	// more outputs
	err = attempt.Finish(map[string]interface{}{
		"output": []string{"unit"},
	})
	c.Assert(err, check.IsNil)

	// Since attempt is no longer active, this shouldn't generate
	// new outputs
	units, err := two.WorkUnits(coordinate.WorkUnitQuery{})
	c.Assert(err, check.IsNil)
	c.Check(units, check.HasLen, 0)
}
Exemple #3
0
// TestWorkUnitData validates that the system can store and update
// data.
func (s *Suite) TestWorkUnitData(c *check.C) {
	var (
		data map[string]interface{}
		unit coordinate.WorkUnit
	)
	spec, err := s.Namespace.SetWorkSpec(map[string]interface{}{
		"name":   "spec",
		"min_gb": 1,
	})
	c.Assert(err, check.IsNil)

	_, err = spec.AddWorkUnit("a", map[string]interface{}{
		"name":  "a",
		"value": 1,
	}, 0.0)
	c.Assert(err, check.IsNil)

	_, err = spec.AddWorkUnit("b", map[string]interface{}{
		"name":  "b",
		"value": 2,
	}, 0.0)
	c.Assert(err, check.IsNil)

	unit, err = spec.WorkUnit("a")
	c.Assert(err, check.IsNil)
	data, err = unit.Data()
	c.Assert(err, check.IsNil)
	c.Check(data, check.HasLen, 2)
	c.Check(data["name"], check.Equals, "a")
	c.Check(data["value"], Like, 1)

	unit, err = spec.WorkUnit("b")
	c.Assert(err, check.IsNil)
	data, err = unit.Data()
	c.Assert(err, check.IsNil)
	c.Check(data, check.HasLen, 2)
	c.Check(data["name"], check.Equals, "b")
	c.Check(data["value"], Like, 2)
}
Exemple #4
0
// TestTrivialWorkUnitFlow tests work unit creation, deletion, and existence.
func (s *Suite) TestTrivialWorkUnitFlow(c *check.C) {
	var (
		count int
		err   error
		spec  coordinate.WorkSpec
		unit  coordinate.WorkUnit
		units map[string]coordinate.WorkUnit
	)

	spec, err = s.Namespace.SetWorkSpec(map[string]interface{}{
		"name":   "spec",
		"min_gb": 1,
	})
	c.Assert(err, check.IsNil)

	unit, err = spec.AddWorkUnit("unit", map[string]interface{}{}, 0)
	c.Assert(err, check.IsNil)
	c.Check(unit.Name(), check.Equals, "unit")
	c.Check(unit.WorkSpec().Name(), check.Equals, "spec")

	unit, err = spec.WorkUnit("unit")
	c.Assert(err, check.IsNil)
	c.Check(unit.Name(), check.Equals, "unit")
	c.Check(unit.WorkSpec().Name(), check.Equals, "spec")

	units, err = spec.WorkUnits(coordinate.WorkUnitQuery{})
	c.Assert(err, check.IsNil)
	c.Check(units, check.HasLen, 1)
	c.Check(units["unit"], check.NotNil)
	c.Check(units["unit"].Name(), check.Equals, "unit")
	c.Check(units["unit"].WorkSpec().Name(), check.Equals, "spec")

	count, err = spec.DeleteWorkUnits(coordinate.WorkUnitQuery{})
	c.Assert(err, check.IsNil)
	c.Check(count, check.Equals, 1)

	unit, err = spec.WorkUnit("unit")
	c.Assert(err, check.IsNil)
	c.Check(unit, check.IsNil)

	units, err = spec.WorkUnits(coordinate.WorkUnitQuery{})
	c.Assert(err, check.IsNil)
	c.Check(units, check.HasLen, 0)
}
Exemple #5
0
// UpdateWorkUnit causes some state change in a work unit.  If the
// work unit is pending, this is the principal interface to complete
// or renew it; if it is already complete this can cause it to be
// retried.
func (jobs *JobServer) UpdateWorkUnit(
	workSpecName string,
	workUnitKey string,
	options map[string]interface{},
) (bool, string, error) {
	// Note that in several corner cases, the behavior of this as
	// written disagrees with Python coordinated's:
	//
	// * If neither "lease_time" nor "status" is specified,
	//   Python coordinated immediately returns False without
	//   checking if workUnitKey is valid
	//
	// * Python coordinated allows arbitrary status changes,
	//   including AVAILABLE -> FINISHED
	//
	// * This openly ignores "worker_id", as distinct from Python
	//   coordinated, which logs an obscure warning and changes it,
	//   but only on a renew
	var (
		attempt    coordinate.Attempt
		changed    bool
		err        error
		status     coordinate.AttemptStatus
		uwuOptions UpdateWorkUnitOptions
		workSpec   coordinate.WorkSpec
		workUnit   coordinate.WorkUnit
	)
	err = decode(&uwuOptions, options)
	if err == nil {
		workSpec, err = jobs.Namespace.WorkSpec(workSpecName)
	}
	if err == nil {
		workUnit, err = workSpec.WorkUnit(workUnitKey)
	}
	if err == nil {
		if workUnit == nil {
			return false, fmt.Sprintf("no such work unit key=%v", workUnitKey), nil
		}
	}
	if err == nil {
		attempt, err = workUnit.ActiveAttempt()
	}
	if err == nil && attempt != nil {
		status, err = attempt.Status()
	}
	if err == nil && attempt != nil {
		if status == coordinate.Expired || status == coordinate.Retryable {
			// The Python Coordinate API sees both of these
			// statuses as "available", and we want to fall
			// into the next block.
			attempt = nil
		}
	}
	if err == nil && attempt == nil {
		// Caller is trying to manipulate an AVAILABLE work
		// unit.  Cowardly refuse to start a new attempt on
		// their behalf, or to update the persistent work unit
		// data this way.  (In theory there's no reason we
		// *couldn't* do either, though I'm not aware of any
		// callers that do; add_work_unit will replace
		// existing work units and is the more typical way to
		// refresh data.)
		err = errors.New("update_work_unit will not adjust an available work unit")
	}
	if err == nil {
		switch status {
		case coordinate.Pending:
			changed = true // or there's an error
			switch uwuOptions.Status {
			case 0, Pending:
				err = uwuRenew(attempt, uwuOptions)
			case Available:
				err = attempt.Expire(uwuOptions.Data)
			case Finished:
				err = attempt.Finish(uwuOptions.Data)
			case Failed:
				err = attempt.Fail(uwuOptions.Data)
			default:
				err = errors.New("update_work_unit invalid status")
			}
		case coordinate.Expired:
			err = errors.New("update_work_unit logic error, trying to refresh expired unit")
		case coordinate.Finished:
			switch uwuOptions.Status {
			case 0, Finished:
				changed = false // no-op
			case Available:
				err = workUnit.ClearActiveAttempt()
				changed = true
			case Failed:
				changed = false // see below
			default:
				err = errors.New("update_work_unit cannot change finished unit")
			}
		case coordinate.Failed:
			switch uwuOptions.Status {
			case 0, Failed:
				changed = false // no-op
			case Available: // "retry"
				err = workUnit.ClearActiveAttempt()
				changed = true
			case Finished:
				// The Python worker, with two separate
				// processes, has a race wherein there
				// could be 15 seconds to go, the parent
				// kills off the child, and the child
				// finishes successfully, all at the same
				// time.  In that case the successful
				// finish should win.
				err = attempt.Finish(nil)
				changed = true
			default:
				err = errors.New("update_work_unit cannot change failed unit")
			}
		case coordinate.Retryable:
			err = errors.New("update_work_unit logic error, trying to refresh retryable unit")
		default:
			err = fmt.Errorf("update_work_unit invalid attempt status %+v", status)
		}
	}
	return changed && err == nil, "", err
}
Exemple #6
0
// TestWorkUnitPrioritySet tests two different ways of setting work unit
// priority.
func (s *Suite) TestWorkUnitPrioritySet(c *check.C) {
	var (
		err      error
		priority float64
		unit     coordinate.WorkUnit
	)
	spec, worker := s.makeWorkSpecAndWorker(c)

	unit, err = spec.AddWorkUnit("a", map[string]interface{}{}, 0.0)
	c.Assert(err, check.IsNil)
	priority, err = unit.Priority()
	c.Assert(err, check.IsNil)
	c.Check(priority, check.Equals, 0.0)

	unit, err = spec.AddWorkUnit("b", map[string]interface{}{}, 0.0)
	c.Assert(err, check.IsNil)
	err = unit.SetPriority(10.0)
	c.Assert(err, check.IsNil)
	priority, err = unit.Priority()
	c.Assert(err, check.IsNil)
	c.Check(priority, check.Equals, 10.0)

	unit, err = spec.AddWorkUnit("c", map[string]interface{}{}, 0.0)
	c.Assert(err, check.IsNil)
	err = spec.SetWorkUnitPriorities(coordinate.WorkUnitQuery{
		Names: []string{"c"},
	}, 20.0)
	c.Assert(err, check.IsNil)
	priority, err = unit.Priority()
	c.Assert(err, check.IsNil)
	c.Check(priority, check.Equals, 20.0)

	unit, err = spec.AddWorkUnit("d", map[string]interface{}{}, 0.0)
	c.Assert(err, check.IsNil)
	err = spec.AdjustWorkUnitPriorities(coordinate.WorkUnitQuery{
		Names: []string{"d"},
	}, 20.0)
	priority, err = unit.Priority()
	c.Assert(err, check.IsNil)
	c.Check(priority, check.Equals, 20.0)
	c.Assert(err, check.IsNil)
	err = spec.AdjustWorkUnitPriorities(coordinate.WorkUnitQuery{
		Names: []string{"d"},
	}, 10.0)
	c.Assert(err, check.IsNil)
	priority, err = unit.Priority()
	c.Assert(err, check.IsNil)
	c.Check(priority, check.Equals, 30.0)

	unit, err = spec.WorkUnit("b")
	c.Assert(err, check.IsNil)
	priority, err = unit.Priority()
	c.Assert(err, check.IsNil)
	c.Check(priority, check.Equals, 10.0)

	checkWorkUnitOrder(c, worker, spec, "d", "c", "b", "a")
}
Exemple #7
0
// TestChainingTwoStep separately renews an attempt to insert an output
// key, then finishes the work unit; it should still chain.
func (s *Suite) TestChainingTwoStep(c *check.C) {
	var (
		one, two coordinate.WorkSpec
		worker   coordinate.Worker
		attempts []coordinate.Attempt
		units    map[string]coordinate.WorkUnit
		unit     coordinate.WorkUnit
		data     map[string]interface{}
		priority float64
		ok       bool
		err      error
	)

	one, err = s.Namespace.SetWorkSpec(map[string]interface{}{
		"name": "one",
		"then": "two",
	})
	c.Assert(err, check.IsNil)

	two, err = s.Namespace.SetWorkSpec(map[string]interface{}{
		"name": "two",
	})
	c.Assert(err, check.IsNil)

	worker, err = s.Namespace.Worker("worker")
	c.Assert(err, check.IsNil)

	_, err = one.AddWorkUnit("a", map[string]interface{}{}, 0.0)
	c.Assert(err, check.IsNil)

	attempts, err = worker.RequestAttempts(coordinate.AttemptRequest{})
	c.Assert(err, check.IsNil)
	c.Assert(attempts, check.HasLen, 1)

	err = attempts[0].Renew(time.Duration(900)*time.Second,
		map[string]interface{}{
			"output": []interface{}{
				[]byte{1, 2, 3, 4},
				cborrpc.PythonTuple{Items: []interface{}{
					[]byte{1, 2, 3, 4},
					map[interface{}]interface{}{},
					map[interface{}]interface{}{
						"priority": 0,
					},
				}},
			},
		})
	c.Assert(err, check.IsNil)

	err = attempts[0].Finish(nil)

	units, err = two.WorkUnits(coordinate.WorkUnitQuery{})
	c.Assert(err, check.IsNil)
	c.Check(units, HasKeys, []string{"\x01\x02\x03\x04"})
	if unit, ok = units["\x01\x02\x03\x04"]; ok {
		data, err = unit.Data()
		c.Assert(err, check.IsNil)
		c.Check(data, check.DeepEquals, map[string]interface{}{})

		priority, err = unit.Priority()
		c.Assert(err, check.IsNil)
		c.Check(priority, check.Equals, 0.0)
	}
}
Exemple #8
0
// TestChainingMixed uses a combination of strings and tuples in its
// "output" data.
func (s *Suite) TestChainingMixed(c *check.C) {
	var (
		one, two coordinate.WorkSpec
		worker   coordinate.Worker
		attempts []coordinate.Attempt
		units    map[string]coordinate.WorkUnit
		unit     coordinate.WorkUnit
		data     map[string]interface{}
		priority float64
		ok       bool
		err      error
	)

	one, err = s.Namespace.SetWorkSpec(map[string]interface{}{
		"name": "one",
		"then": "two",
	})
	c.Assert(err, check.IsNil)

	two, err = s.Namespace.SetWorkSpec(map[string]interface{}{
		"name": "two",
	})
	c.Assert(err, check.IsNil)

	worker, err = s.Namespace.Worker("worker")
	c.Assert(err, check.IsNil)

	_, err = one.AddWorkUnit("a", map[string]interface{}{}, 0.0)
	c.Assert(err, check.IsNil)

	attempts, err = worker.RequestAttempts(coordinate.AttemptRequest{})
	c.Assert(err, check.IsNil)
	c.Assert(attempts, check.HasLen, 1)

	err = attempts[0].Finish(map[string]interface{}{
		"output": []interface{}{
			"key",
			cborrpc.PythonTuple{Items: []interface{}{
				"key",
				map[string]interface{}{
					"data": "x",
				},
				map[string]interface{}{
					"priority": 10.0,
				},
			}},
		},
	})
	c.Assert(err, check.IsNil)

	units, err = two.WorkUnits(coordinate.WorkUnitQuery{})
	c.Assert(err, check.IsNil)
	c.Check(units, HasKeys, []string{"key"})
	if unit, ok = units["key"]; ok {
		data, err = unit.Data()
		c.Assert(err, check.IsNil)
		c.Check(data, check.DeepEquals, map[string]interface{}{"data": "x"})

		priority, err = unit.Priority()
		c.Assert(err, check.IsNil)
		c.Check(priority, check.Equals, 10.0)
	}
}
Exemple #9
0
// TestAttemptLifetime validates a basic attempt lifetime.
func (s *Suite) TestAttemptLifetime(c *check.C) {
	var (
		err               error
		data              map[string]interface{}
		attempt, attempt2 coordinate.Attempt
		aStatus           coordinate.AttemptStatus
		spec              coordinate.WorkSpec
		unit              coordinate.WorkUnit
		worker            coordinate.Worker
		uStatus           coordinate.WorkUnitStatus
	)
	spec, worker = s.makeWorkSpecAndWorker(c)

	// Create a work unit
	unit, err = spec.AddWorkUnit("a", map[string]interface{}{}, 0.0)
	c.Assert(err, check.IsNil)

	// The work unit should be "available"
	uStatus, err = unit.Status()
	c.Assert(err, check.IsNil)
	c.Check(uStatus, check.Equals, coordinate.AvailableUnit)

	// The work unit data should be defined but empty
	data, err = unit.Data()
	c.Assert(err, check.IsNil)
	c.Check(data, check.HasLen, 0)

	// Get an attempt for it
	attempts, err := worker.RequestAttempts(coordinate.AttemptRequest{})
	c.Assert(err, check.IsNil)
	c.Assert(attempts, check.HasLen, 1)
	attempt = attempts[0]

	// The work unit should be "pending"
	uStatus, err = unit.Status()
	c.Assert(err, check.IsNil)
	c.Check(uStatus, check.Equals, coordinate.PendingUnit)

	// The attempt should be "pending" too
	aStatus, err = attempt.Status()
	c.Assert(err, check.IsNil)
	c.Check(aStatus, check.Equals, coordinate.Pending)

	// The active attempt for the unit should match this
	attempt2, err = unit.ActiveAttempt()
	c.Assert(err, check.IsNil)
	c.Check(attempt2, AttemptMatches, attempt)

	// There should be one active attempt for the worker and it should
	// also match
	attempts, err = worker.ActiveAttempts()
	c.Assert(err, check.IsNil)
	c.Check(attempts, check.HasLen, 1)
	if len(attempts) > 0 {
		c.Check(attempts[0], AttemptMatches, attempt)
	}

	// The work unit data should (still) be defined but empty
	data, err = unit.Data()
	c.Assert(err, check.IsNil)
	c.Check(data, check.HasLen, 0)

	// Now finish the attempt with some updated data
	err = attempt.Finish(map[string]interface{}{
		"outputs": []string{"yes"},
	})
	c.Assert(err, check.IsNil)

	// The unit should report "finished"
	uStatus, err = unit.Status()
	c.Assert(err, check.IsNil)
	c.Check(uStatus, check.Equals, coordinate.FinishedUnit)

	// The attempt should report "finished"
	aStatus, err = attempt.Status()
	c.Assert(err, check.IsNil)
	c.Check(aStatus, check.Equals, coordinate.Finished)

	// The attempt should still be the active attempt for the unit
	attempt2, err = unit.ActiveAttempt()
	c.Assert(err, check.IsNil)
	c.Check(attempt2, AttemptMatches, attempt)

	// The attempt should not be in the active attempt list for the worker
	attempts, err = worker.ActiveAttempts()
	c.Assert(err, check.IsNil)
	c.Check(attempts, check.HasLen, 0)

	// Both the unit and the worker should have one archived attempt
	attempts, err = unit.Attempts()
	c.Assert(err, check.IsNil)
	c.Check(attempts, check.HasLen, 1)
	if len(attempts) > 0 {
		c.Check(attempts[0], AttemptMatches, attempt)
	}

	attempts, err = worker.AllAttempts()
	c.Assert(err, check.IsNil)
	c.Check(attempts, check.HasLen, 1)
	if len(attempts) > 0 {
		c.Check(attempts[0], AttemptMatches, attempt)
	}

	// This should have updated the visible work unit data too
	data, err = unit.Data()
	c.Assert(err, check.IsNil)
	c.Check(data, check.HasLen, 1)
	c.Check(data["outputs"], check.HasLen, 1)
	c.Check(reflect.ValueOf(data["outputs"]).Index(0).Interface(),
		check.Equals, "yes")

	// For bonus points, force-clear the active attempt
	err = unit.ClearActiveAttempt()
	c.Assert(err, check.IsNil)

	// This should have pushed the unit back to available
	uStatus, err = unit.Status()
	c.Assert(err, check.IsNil)
	c.Check(uStatus, check.Equals, coordinate.AvailableUnit)

	// This also should have reset the work unit data
	data, err = unit.Data()
	c.Assert(err, check.IsNil)
	c.Check(data, check.HasLen, 0)

	// But, this should not have reset the historical attempts
	attempts, err = unit.Attempts()
	c.Assert(err, check.IsNil)
	c.Check(attempts, check.HasLen, 1)
	if len(attempts) > 0 {
		c.Check(attempts[0], AttemptMatches, attempt)
	}

	attempts, err = worker.AllAttempts()
	c.Assert(err, check.IsNil)
	c.Check(attempts, check.HasLen, 1)
	if len(attempts) > 0 {
		c.Check(attempts[0], AttemptMatches, attempt)
	}
}