// loadCheckState is used to restore the persisted state of a check. func (a *Agent) loadCheckState(check *structs.HealthCheck) error { // Try to read the persisted state for this check file := filepath.Join(a.config.DataDir, checkStateDir, stringHash(check.CheckID)) buf, err := ioutil.ReadFile(file) if err != nil { if os.IsNotExist(err) { return nil } return fmt.Errorf("failed reading file %q: %s", file, err) } // Decode the state data var p persistedCheckState if err := json.Unmarshal(buf, &p); err != nil { return fmt.Errorf("failed decoding check state: %s", err) } // Check if the state has expired if time.Now().Unix() >= p.Expires { a.logger.Printf("[DEBUG] agent: check state expired for %q, not restoring", check.CheckID) return a.purgeCheckState(check.CheckID) } // Restore the fields from the state check.Output = p.Output check.Status = p.Status return nil }
func TestAgentAntiEntropy_Checks(t *testing.T) { conf := nextConfig() dir, agent := makeAgent(t, conf) defer os.RemoveAll(dir) defer agent.Shutdown() testutil.WaitForLeader(t, agent.RPC, "dc1") // Register info args := &structs.RegisterRequest{ Datacenter: "dc1", Node: agent.config.NodeName, Address: "127.0.0.1", } // Exists both, same (noop) var out struct{} chk1 := &structs.HealthCheck{ Node: agent.config.NodeName, CheckID: "mysql", Name: "mysql", Status: structs.HealthPassing, } agent.state.AddCheck(chk1, "") args.Check = chk1 if err := agent.RPC("Catalog.Register", args, &out); err != nil { t.Fatalf("err: %v", err) } // Exists both, different (update) chk2 := &structs.HealthCheck{ Node: agent.config.NodeName, CheckID: "redis", Name: "redis", Status: structs.HealthPassing, } agent.state.AddCheck(chk2, "") chk2_mod := new(structs.HealthCheck) *chk2_mod = *chk2 chk2_mod.Status = structs.HealthCritical args.Check = chk2_mod if err := agent.RPC("Catalog.Register", args, &out); err != nil { t.Fatalf("err: %v", err) } // Exists local (create) chk3 := &structs.HealthCheck{ Node: agent.config.NodeName, CheckID: "web", Name: "web", Status: structs.HealthPassing, } agent.state.AddCheck(chk3, "") // Exists remote (delete) chk4 := &structs.HealthCheck{ Node: agent.config.NodeName, CheckID: "lb", Name: "lb", Status: structs.HealthPassing, } args.Check = chk4 if err := agent.RPC("Catalog.Register", args, &out); err != nil { t.Fatalf("err: %v", err) } // Exists local, in sync, remote missing (create) chk5 := &structs.HealthCheck{ Node: agent.config.NodeName, CheckID: "cache", Name: "cache", Status: structs.HealthPassing, } agent.state.AddCheck(chk5, "") agent.state.checkStatus["cache"] = syncStatus{inSync: true} // Trigger anti-entropy run and wait agent.StartSync() time.Sleep(200 * time.Millisecond) // Verify that we are in sync req := structs.NodeSpecificRequest{ Datacenter: "dc1", Node: agent.config.NodeName, } var checks structs.IndexedHealthChecks if err := agent.RPC("Health.NodeChecks", &req, &checks); err != nil { t.Fatalf("err: %v", err) } // We should have 5 checks (serf included) if len(checks.HealthChecks) != 5 { t.Fatalf("bad: %v", checks) } // All the checks should match for _, chk := range checks.HealthChecks { switch chk.CheckID { case "mysql": if !reflect.DeepEqual(chk, chk1) { t.Fatalf("bad: %v %v", chk, chk1) } case "redis": if !reflect.DeepEqual(chk, chk2) { t.Fatalf("bad: %v %v", chk, chk2) } case "web": if !reflect.DeepEqual(chk, chk3) { t.Fatalf("bad: %v %v", chk, chk3) } case "cache": if !reflect.DeepEqual(chk, chk5) { t.Fatalf("bad: %v %v", chk, chk5) } case "serfHealth": // ignore default: t.Fatalf("unexpected check: %v", chk) } } // Check the local state if len(agent.state.checks) != 4 { t.Fatalf("bad: %v", agent.state.checks) } if len(agent.state.checkStatus) != 4 { t.Fatalf("bad: %v", agent.state.checkStatus) } for name, status := range agent.state.checkStatus { if !status.inSync { t.Fatalf("should be in sync: %v %v", name, status) } } }