func (this *Environment) EnvStep(action rlglue.Action) (obs rlglue.Observation, r float64, t bool) { fps := make([]float64, len(this.status)) reboot := int(action.Ints()[0]) for i := range this.status { if reboot == i { fps[i] = 0 } else { fps[i] = this.cfg.FailBase li := (i + this.cfg.NumSystems - 1) % this.cfg.NumSystems ri := (i + 1) % this.cfg.NumSystems if !this.status[li] { fps[i] += this.cfg.FailIncr } if !this.status[ri] { fps[i] += this.cfg.FailIncr } } if this.status[i] || reboot == i { this.status[i] = stat.NextUniform() < (1 - fps[i]) } else { this.status[i] = stat.NextUniform() < (1 - this.cfg.FailStay) } if this.status[i] { r++ } } if reboot < this.cfg.NumSystems { r-- } obs = this.ConstructObs() return }
func (this *Belief) Next(c discrete.Action) (o discrete.Oracle, r float64) { visits := this.totals[c] - this.visitOffset[c] if this.M != 0 && uint64(visits) >= this.M { o = this if this.counts[c]/this.totals[c] > stat.NextUniform() { r = 1 } return } next := new(Belief) next.counts = make([]float64, len(this.counts)) copy(next.counts, this.counts) next.totals = make([]float64, len(this.totals)) copy(next.totals, this.totals) next.visitOffset = this.visitOffset prob := this.counts[c] / this.totals[c] if prob > stat.NextUniform() { r = 1 next.counts[c] += 1 } next.totals[c] += 1 next.rehash() next.M = this.M o = next return }
func (this *BetaTerminal) Next(s discrete.State, a discrete.Action) (t bool) { index := s.Hashcode() + this.NumStates*a.Hashcode() if this.Known[index] { t = this.Term[index] return } prob := this.Alpha / (this.Alpha + this.Beta) if stat.NextUniform() < prob { t = true } return }
func TestDepLL(t *testing.T) { stat.Seed(240) cfg := ConfigDefault() stateRanges := rlglue.IntRanges{rlglue.IntRange{0, 1}, rlglue.IntRange{0, 1}} actionRanges := rlglue.IntRanges{rlglue.IntRange{0, 0}} dl := NewDepLearner(0, cfg, stateRanges, actionRanges) for s := uint64(0); s < dl.bg.numStates; s++ { sv := dl.bg.stateValues[s] for a := uint64(0); a < dl.bg.numActions; a++ { for i := 0; i < 10; i++ { if (sv[0] == 0) != (stat.NextUniform() < .9) { dl = dl.Update(s, a, 1) } else { dl = dl.Update(s, a, 0) } } } } ll0 := dl.ParentSetLoglihoodRatio(ParentSet(0)) if abs(ll0-dl.mappedLoglihood) > .0001 { t.Error("incremental ll off") } ll1 := dl.ParentSetLoglihoodRatio(ParentSet(0).Insert(0)) ll2 := dl.ParentSetLoglihoodRatio(ParentSet(0).Insert(1)) ll3 := dl.ParentSetLoglihoodRatio(ParentSet(0).Insert(0).Insert(1)) if abs(ll1-ll3-1.931146) > .0001 || abs(ll2-ll3+9.941318) > .0001 { t.Error(fmt.Sprintf("got wrong lls: %f, %f", ll1-ll3, ll2-ll3)) } dl.Consider(ParentSet(0)) dl.Consider(ParentSet(0).Insert(0)) dl.Consider(ParentSet(0).Insert(1)) dl.Consider(ParentSet(0).Insert(0).Insert(1)) if !dl.parents.Contains(0) || dl.parents.Contains(1) { t.Error("Got wrong parents") } }
func flip(p float64) bool { return p > stat.NextUniform() }