示例#1
0
func (this *Environment) EnvStep(action rlglue.Action) (obs rlglue.Observation, r float64, t bool) {
	fps := make([]float64, len(this.status))
	reboot := int(action.Ints()[0])
	for i := range this.status {
		if reboot == i {
			fps[i] = 0
		} else {
			fps[i] = this.cfg.FailBase
			li := (i + this.cfg.NumSystems - 1) % this.cfg.NumSystems
			ri := (i + 1) % this.cfg.NumSystems
			if !this.status[li] {
				fps[i] += this.cfg.FailIncr
			}
			if !this.status[ri] {
				fps[i] += this.cfg.FailIncr
			}
		}
		if this.status[i] || reboot == i {
			this.status[i] = stat.NextUniform() < (1 - fps[i])
		} else {
			this.status[i] = stat.NextUniform() < (1 - this.cfg.FailStay)
		}
		if this.status[i] {
			r++
		}
	}
	if reboot < this.cfg.NumSystems {
		r--
	}
	obs = this.ConstructObs()
	return
}
示例#2
0
func (this *Belief) Next(c discrete.Action) (o discrete.Oracle, r float64) {
	visits := this.totals[c] - this.visitOffset[c]
	if this.M != 0 && uint64(visits) >= this.M {
		o = this
		if this.counts[c]/this.totals[c] > stat.NextUniform() {
			r = 1
		}
		return
	}
	next := new(Belief)
	next.counts = make([]float64, len(this.counts))
	copy(next.counts, this.counts)
	next.totals = make([]float64, len(this.totals))
	copy(next.totals, this.totals)
	next.visitOffset = this.visitOffset
	prob := this.counts[c] / this.totals[c]
	if prob > stat.NextUniform() {
		r = 1
		next.counts[c] += 1
	}
	next.totals[c] += 1
	next.rehash()
	next.M = this.M
	o = next
	return
}
示例#3
0
func (this *BetaTerminal) Next(s discrete.State, a discrete.Action) (t bool) {
	index := s.Hashcode() + this.NumStates*a.Hashcode()
	if this.Known[index] {
		t = this.Term[index]
		return
	}
	prob := this.Alpha / (this.Alpha + this.Beta)
	if stat.NextUniform() < prob {
		t = true
	}
	return
}
示例#4
0
func TestDepLL(t *testing.T) {
	stat.Seed(240)
	cfg := ConfigDefault()
	stateRanges := rlglue.IntRanges{rlglue.IntRange{0, 1}, rlglue.IntRange{0, 1}}
	actionRanges := rlglue.IntRanges{rlglue.IntRange{0, 0}}
	dl := NewDepLearner(0, cfg, stateRanges, actionRanges)
	for s := uint64(0); s < dl.bg.numStates; s++ {
		sv := dl.bg.stateValues[s]
		for a := uint64(0); a < dl.bg.numActions; a++ {
			for i := 0; i < 10; i++ {
				if (sv[0] == 0) != (stat.NextUniform() < .9) {
					dl = dl.Update(s, a, 1)
				} else {
					dl = dl.Update(s, a, 0)
				}
			}
		}
	}
	ll0 := dl.ParentSetLoglihoodRatio(ParentSet(0))
	if abs(ll0-dl.mappedLoglihood) > .0001 {
		t.Error("incremental ll off")
	}
	ll1 := dl.ParentSetLoglihoodRatio(ParentSet(0).Insert(0))
	ll2 := dl.ParentSetLoglihoodRatio(ParentSet(0).Insert(1))
	ll3 := dl.ParentSetLoglihoodRatio(ParentSet(0).Insert(0).Insert(1))
	if abs(ll1-ll3-1.931146) > .0001 || abs(ll2-ll3+9.941318) > .0001 {
		t.Error(fmt.Sprintf("got wrong lls: %f, %f", ll1-ll3, ll2-ll3))
	}
	dl.Consider(ParentSet(0))
	dl.Consider(ParentSet(0).Insert(0))
	dl.Consider(ParentSet(0).Insert(1))
	dl.Consider(ParentSet(0).Insert(0).Insert(1))
	if !dl.parents.Contains(0) || dl.parents.Contains(1) {
		t.Error("Got wrong parents")
	}
}
示例#5
0
func flip(p float64) bool {
	return p > stat.NextUniform()
}