func TestFSM_UpdateAllocFromClient(t *testing.T) { fsm := testFSM(t) state := fsm.State() alloc := mock.Alloc() state.UpsertAllocs(1, []*structs.Allocation{alloc}) clientAlloc := new(structs.Allocation) *clientAlloc = *alloc clientAlloc.ClientStatus = structs.AllocClientStatusFailed req := structs.AllocUpdateRequest{ Alloc: []*structs.Allocation{clientAlloc}, } buf, err := structs.Encode(structs.AllocClientUpdateRequestType, req) if err != nil { t.Fatalf("err: %v", err) } resp := fsm.Apply(makeLog(buf)) if resp != nil { t.Fatalf("resp: %v", resp) } // Verify we are registered out, err := fsm.State().AllocByID(alloc.ID) if err != nil { t.Fatalf("err: %v", err) } clientAlloc.CreateIndex = out.CreateIndex clientAlloc.ModifyIndex = out.ModifyIndex if !reflect.DeepEqual(clientAlloc, out) { t.Fatalf("bad: %#v %#v", clientAlloc, out) } }
// setAlloc is used to update the allocation of the runner // we preserve the existing client status and description func (r *AllocRunner) setAlloc(alloc *structs.Allocation) { if r.alloc != nil { alloc.ClientStatus = r.alloc.ClientStatus alloc.ClientDescription = r.alloc.ClientDescription } r.alloc = alloc }
func TestClient_UpdateAllocStatus(t *testing.T) { s1, _ := testServer(t, nil) defer s1.Shutdown() testutil.WaitForLeader(t, s1.RPC) c1 := testClient(t, func(c *config.Config) { c.RPCHandler = s1 }) defer c1.Shutdown() alloc := mock.Alloc() alloc.NodeID = c1.Node().ID state := s1.State() state.UpsertAllocs(100, []*structs.Allocation{alloc}) newAlloc := new(structs.Allocation) *newAlloc = *alloc newAlloc.ClientStatus = structs.AllocClientStatusRunning err := c1.updateAllocStatus(newAlloc) if err != nil { t.Fatalf("err: %v", err) } out, err := state.AllocByID(alloc.ID) if err != nil { t.Fatalf("err: %v", err) } if out == nil || out.ClientStatus != structs.AllocClientStatusRunning { t.Fatalf("bad: %#v", out) } }
func TestClientEndpoint_UpdateAlloc(t *testing.T) { s1 := testServer(t, nil) defer s1.Shutdown() codec := rpcClient(t, s1) testutil.WaitForLeader(t, s1.RPC) // Create the register request node := mock.Node() reg := &structs.NodeRegisterRequest{ Node: node, WriteRequest: structs.WriteRequest{Region: "global"}, } // Fetch the response var resp structs.GenericResponse if err := msgpackrpc.CallWithCodec(codec, "Node.Register", reg, &resp); err != nil { t.Fatalf("err: %v", err) } // Inject fake evaluations alloc := mock.Alloc() alloc.NodeID = node.ID state := s1.fsm.State() state.UpsertJobSummary(99, mock.JobSummary(alloc.JobID)) err := state.UpsertAllocs(100, []*structs.Allocation{alloc}) if err != nil { t.Fatalf("err: %v", err) } // Attempt update clientAlloc := new(structs.Allocation) *clientAlloc = *alloc clientAlloc.ClientStatus = structs.AllocClientStatusFailed // Update the alloc update := &structs.AllocUpdateRequest{ Alloc: []*structs.Allocation{clientAlloc}, WriteRequest: structs.WriteRequest{Region: "global"}, } var resp2 structs.NodeAllocsResponse start := time.Now() if err := msgpackrpc.CallWithCodec(codec, "Node.UpdateAlloc", update, &resp2); err != nil { t.Fatalf("err: %v", err) } if resp2.Index == 0 { t.Fatalf("Bad index: %d", resp2.Index) } if diff := time.Since(start); diff < batchUpdateInterval { t.Fatalf("too fast: %v", diff) } // Lookup the alloc out, err := state.AllocByID(alloc.ID) if err != nil { t.Fatalf("err: %v", err) } if out.ClientStatus != structs.AllocClientStatusFailed { t.Fatalf("Bad: %#v", out) } }
// UpdateAllocFromClient is used to update an allocation based on input // from a client. While the schedulers are the authority on the allocation for // most things, some updates are authoritative from the client. Specifically, // the desired state comes from the schedulers, while the actual state comes // from clients. func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocation) error { txn := s.db.Txn(true) defer txn.Abort() watcher := watch.NewItems() watcher.Add(watch.Item{Table: "allocs"}) watcher.Add(watch.Item{Alloc: alloc.ID}) watcher.Add(watch.Item{AllocEval: alloc.EvalID}) watcher.Add(watch.Item{AllocJob: alloc.JobID}) watcher.Add(watch.Item{AllocNode: alloc.NodeID}) // Look for existing alloc existing, err := txn.First("allocs", "id", alloc.ID) if err != nil { return fmt.Errorf("alloc lookup failed: %v", err) } // Nothing to do if this does not exist if existing == nil { return nil } exist := existing.(*structs.Allocation) // Copy everything from the existing allocation copyAlloc := new(structs.Allocation) *copyAlloc = *exist // Pull in anything the client is the authority on copyAlloc.ClientStatus = alloc.ClientStatus copyAlloc.ClientDescription = alloc.ClientDescription copyAlloc.TaskStates = alloc.TaskStates // Update the modify index copyAlloc.ModifyIndex = index // Update the allocation if err := txn.Insert("allocs", copyAlloc); err != nil { return fmt.Errorf("alloc insert failed: %v", err) } // Update the indexes if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { return fmt.Errorf("index update failed: %v", err) } // Set the job's status forceStatus := "" if !copyAlloc.TerminalStatus() { forceStatus = structs.JobStatusRunning } jobs := map[string]string{alloc.JobID: forceStatus} if err := s.setJobStatuses(index, watcher, txn, jobs, false); err != nil { return fmt.Errorf("setting job status failed: %v", err) } txn.Defer(func() { s.watch.notify(watcher) }) txn.Commit() return nil }
func TestClientEndpoint_BatchUpdate(t *testing.T) { s1 := testServer(t, nil) defer s1.Shutdown() codec := rpcClient(t, s1) testutil.WaitForLeader(t, s1.RPC) // Create the register request node := mock.Node() reg := &structs.NodeRegisterRequest{ Node: node, WriteRequest: structs.WriteRequest{Region: "global"}, } // Fetch the response var resp structs.GenericResponse if err := msgpackrpc.CallWithCodec(codec, "Node.Register", reg, &resp); err != nil { t.Fatalf("err: %v", err) } // Inject fake evaluations alloc := mock.Alloc() alloc.NodeID = node.ID state := s1.fsm.State() state.UpsertJobSummary(99, mock.JobSummary(alloc.JobID)) err := state.UpsertAllocs(100, []*structs.Allocation{alloc}) if err != nil { t.Fatalf("err: %v", err) } // Attempt update clientAlloc := new(structs.Allocation) *clientAlloc = *alloc clientAlloc.ClientStatus = structs.AllocClientStatusFailed // Call to do the batch update bf := NewBatchFuture() endpoint := s1.endpoints.Node endpoint.batchUpdate(bf, []*structs.Allocation{clientAlloc}) if err := bf.Wait(); err != nil { t.Fatalf("err: %v", err) } if bf.Index() == 0 { t.Fatalf("Bad index: %d", bf.Index()) } // Lookup the alloc out, err := state.AllocByID(alloc.ID) if err != nil { t.Fatalf("err: %v", err) } if out.ClientStatus != structs.AllocClientStatusFailed { t.Fatalf("Bad: %#v", out) } }
// updateAllocStatus is used to update the status of an allocation func (c *Client) updateAllocStatus(alloc *structs.Allocation) { // Only send the fields that are updatable by the client. stripped := new(structs.Allocation) stripped.ID = alloc.ID stripped.NodeID = c.Node().ID stripped.TaskStates = alloc.TaskStates stripped.ClientStatus = alloc.ClientStatus stripped.ClientDescription = alloc.ClientDescription select { case c.allocUpdates <- stripped: case <-c.shutdownCh: } }
// nestedUpdateAllocFromClient is used to nest an update of an allocation with client status func (s *StateStore) nestedUpdateAllocFromClient(txn *memdb.Txn, watcher watch.Items, index uint64, alloc *structs.Allocation) error { // Look for existing alloc existing, err := txn.First("allocs", "id", alloc.ID) if err != nil { return fmt.Errorf("alloc lookup failed: %v", err) } // Nothing to do if this does not exist if existing == nil { return nil } exist := existing.(*structs.Allocation) // Trigger the watcher watcher.Add(watch.Item{Alloc: alloc.ID}) watcher.Add(watch.Item{AllocEval: exist.EvalID}) watcher.Add(watch.Item{AllocJob: exist.JobID}) watcher.Add(watch.Item{AllocNode: exist.NodeID}) // Copy everything from the existing allocation copyAlloc := new(structs.Allocation) *copyAlloc = *exist // Pull in anything the client is the authority on copyAlloc.ClientStatus = alloc.ClientStatus copyAlloc.ClientDescription = alloc.ClientDescription copyAlloc.TaskStates = alloc.TaskStates // Update the modify index copyAlloc.ModifyIndex = index if err := s.updateSummaryWithAlloc(index, copyAlloc, exist, watcher, txn); err != nil { return fmt.Errorf("error updating job summary: %v", err) } // Update the allocation if err := txn.Insert("allocs", copyAlloc); err != nil { return fmt.Errorf("alloc insert failed: %v", err) } // Set the job's status forceStatus := "" if !copyAlloc.TerminalStatus() { forceStatus = structs.JobStatusRunning } jobs := map[string]string{exist.JobID: forceStatus} if err := s.setJobStatuses(index, watcher, txn, jobs, false); err != nil { return fmt.Errorf("setting job status failed: %v", err) } return nil }
func TestStateStore_UpdateAllocFromClient(t *testing.T) { state := testStateStore(t) alloc := mock.Alloc() notify := setupNotifyTest( state, watch.Item{Table: "allocs"}, watch.Item{Alloc: alloc.ID}, watch.Item{AllocEval: alloc.EvalID}, watch.Item{AllocJob: alloc.JobID}, watch.Item{AllocNode: alloc.NodeID}) err := state.UpsertAllocs(1000, []*structs.Allocation{alloc}) if err != nil { t.Fatalf("err: %v", err) } update := new(structs.Allocation) *update = *alloc update.ClientStatus = structs.AllocClientStatusFailed err = state.UpdateAllocFromClient(1001, update) if err != nil { t.Fatalf("err: %v", err) } out, err := state.AllocByID(alloc.ID) if err != nil { t.Fatalf("err: %v", err) } update.ModifyIndex = 1001 if !reflect.DeepEqual(update, out) { t.Fatalf("bad: %#v %#v", update, out) } index, err := state.Index("allocs") if err != nil { t.Fatalf("err: %v", err) } if index != 1001 { t.Fatalf("bad: %d", index) } notify.verify(t) }
// UpdateAllocFromClient is used to update an allocation based on input // from a client. While the schedulers are the authority on the allocation for // most things, some updates are authoritative from the client. Specifically, // the desired state comes from the schedulers, while the actual state comes // from clients. func (s *StateStore) UpdateAllocFromClient(index uint64, alloc *structs.Allocation) error { txn := s.db.Txn(true) defer txn.Abort() // Look for existing alloc existing, err := txn.First("allocs", "id", alloc.ID) if err != nil { return fmt.Errorf("alloc lookup failed: %v", err) } // Nothing to do if this does not exist if existing == nil { return nil } exist := existing.(*structs.Allocation) // Copy everything from the existing allocation copyAlloc := new(structs.Allocation) *copyAlloc = *exist // Pull in anything the client is the authority on copyAlloc.ClientStatus = alloc.ClientStatus copyAlloc.ClientDescription = alloc.ClientDescription // Update the modify index copyAlloc.ModifyIndex = index // Update the allocation if err := txn.Insert("allocs", copyAlloc); err != nil { return fmt.Errorf("alloc insert failed: %v", err) } // Update the indexes if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { return fmt.Errorf("index update failed: %v", err) } nodes := map[string]struct{}{alloc.NodeID: struct{}{}} txn.Defer(func() { s.watch.notifyAllocs(nodes) }) txn.Commit() return nil }
func TestStateStore_UpdateAllocFromClient(t *testing.T) { state := testStateStore(t) alloc := mock.Alloc() err := state.UpsertAllocs(1000, []*structs.Allocation{alloc}) if err != nil { t.Fatalf("err: %v", err) } update := new(structs.Allocation) *update = *alloc update.ClientStatus = structs.AllocClientStatusFailed err = state.UpdateAllocFromClient(1001, update) if err != nil { t.Fatalf("err: %v", err) } out, err := state.AllocByID(alloc.ID) if err != nil { t.Fatalf("err: %v", err) } update.ModifyIndex = 1001 if !reflect.DeepEqual(update, out) { t.Fatalf("bad: %#v %#v", update, out) } index, err := state.Index("allocs") if err != nil { t.Fatalf("err: %v", err) } if index != 1001 { t.Fatalf("bad: %d", index) } }
// inplaceUpdate attempts to update allocations in-place where possible. func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job, stack Stack, updates []allocTuple) []allocTuple { n := len(updates) inplace := 0 for i := 0; i < n; i++ { // Get the update update := updates[i] // Check if the task drivers or config has changed, requires // a rolling upgrade since that cannot be done in-place. existing := update.Alloc.Job.LookupTaskGroup(update.TaskGroup.Name) if tasksUpdated(update.TaskGroup, existing) { continue } // Get the existing node node, err := ctx.State().NodeByID(update.Alloc.NodeID) if err != nil { ctx.Logger().Printf("[ERR] sched: %#v failed to get node '%s': %v", eval, update.Alloc.NodeID, err) continue } if node == nil { continue } // Set the existing node as the base set stack.SetNodes([]*structs.Node{node}) // Stage an eviction of the current allocation. This is done so that // the current allocation is discounted when checking for feasability. // Otherwise we would be trying to fit the tasks current resources and // updated resources. After select is called we can remove the evict. ctx.Plan().AppendUpdate(update.Alloc, structs.AllocDesiredStatusStop, allocInPlace) // Attempt to match the task group option, size := stack.Select(update.TaskGroup) // Pop the allocation ctx.Plan().PopUpdate(update.Alloc) // Skip if we could not do an in-place update if option == nil { continue } // Restore the network offers from the existing allocation. // We do not allow network resources (reserved/dynamic ports) // to be updated. This is guarded in taskUpdated, so we can // safely restore those here. for task, resources := range option.TaskResources { existing := update.Alloc.TaskResources[task] resources.Networks = existing.Networks } // Create a shallow copy newAlloc := new(structs.Allocation) *newAlloc = *update.Alloc // Update the allocation newAlloc.EvalID = eval.ID newAlloc.Job = job newAlloc.Resources = size newAlloc.TaskResources = option.TaskResources newAlloc.Metrics = ctx.Metrics() newAlloc.DesiredStatus = structs.AllocDesiredStatusRun newAlloc.ClientStatus = structs.AllocClientStatusPending ctx.Plan().AppendAlloc(newAlloc) // Remove this allocation from the slice updates[i] = updates[n-1] i-- n-- inplace++ } if len(updates) > 0 { ctx.Logger().Printf("[DEBUG] sched: %#v: %d in-place updates of %d", eval, inplace, len(updates)) } return updates[:n] }
func TestFSM_UpdateAllocFromClient(t *testing.T) { fsm := testFSM(t) fsm.blockedEvals.SetEnabled(true) state := fsm.State() node := mock.Node() state.UpsertNode(1, node) // Mark an eval as blocked. eval := mock.Eval() eval.ClassEligibility = map[string]bool{node.ComputedClass: true} fsm.blockedEvals.Block(eval) bStats := fsm.blockedEvals.Stats() if bStats.TotalBlocked != 1 { t.Fatalf("bad: %#v", bStats) } // Create a completed eval alloc := mock.Alloc() alloc.NodeID = node.ID state.UpsertAllocs(1, []*structs.Allocation{alloc}) clientAlloc := new(structs.Allocation) *clientAlloc = *alloc clientAlloc.ClientStatus = structs.AllocClientStatusDead req := structs.AllocUpdateRequest{ Alloc: []*structs.Allocation{clientAlloc}, } buf, err := structs.Encode(structs.AllocClientUpdateRequestType, req) if err != nil { t.Fatalf("err: %v", err) } resp := fsm.Apply(makeLog(buf)) if resp != nil { t.Fatalf("resp: %v", resp) } // Verify we are registered out, err := fsm.State().AllocByID(alloc.ID) if err != nil { t.Fatalf("err: %v", err) } clientAlloc.CreateIndex = out.CreateIndex clientAlloc.ModifyIndex = out.ModifyIndex if !reflect.DeepEqual(clientAlloc, out) { t.Fatalf("bad: %#v %#v", clientAlloc, out) } // Verify the eval was unblocked. testutil.WaitForResult(func() (bool, error) { bStats = fsm.blockedEvals.Stats() if bStats.TotalBlocked != 0 { return false, fmt.Errorf("bad: %#v %#v", bStats, out) } return true, nil }, func(err error) { t.Fatalf("err: %s", err) }) }