示例#1
0
文件: mdp.go 项目: postfix/rlbayes
func (this *MDPTransition) Next(s discrete.State, a discrete.Action) (n discrete.State) {
	weights := make([]float64, this.MDP.NumStates())
	for n := range this.MDP.S64() {
		//for n := uint64(0); n < this.MDP.S(); n++ {
		weights[n] = this.MDP.T(s, a, n)
	}
	n = discrete.State(stat.NextChoice(weights))
	return
}
示例#2
0
func (this *DirSA) Next() (n discrete.State) {
	if this.weights == nil {
		this.weights = make([]float64, len(this.counts))
		for i, c := range this.counts {
			this.weights[i] = c / this.total
		}
	}
	n = discrete.State(stat.NextChoice(this.weights))
	return
}
示例#3
0
func (this *Posterior) Next(s discrete.State, a discrete.Action) (n discrete.State) {
	c := uint64(this.C.Get(int(s)))
	ck := c*this.bg.NumActions + a.Hashcode()
	hist := this.clusterData[ck]
	fhist := append([]float64{}, this.bg.Beta...)
	total := 0.0
	for i, c := range hist {
		fhist[i] += float64(c)
		total += fhist[i]
	}
	for i := range fhist {
		fhist[i] /= total
	}
	o := discrete.State(stat.NextChoice(fhist))
	n = this.bg.OutcomeToNext(s, o)
	return
}
示例#4
0
文件: util.go 项目: postfix/rlbayes
func LogChoice(lws []float64) int {
	max := lws[0]
	for _, lw := range lws[1:len(lws)] {
		if lw > max {
			max = lw
		}
	}
	ws := make([]float64, len(lws))
	var sum float64
	for i, lw := range lws {
		ws[i] = math.Exp(lw - max)
		sum += ws[i]
	}
	norm := 1 / sum
	for i := range ws {
		ws[i] *= norm
	}
	return int(stat.NextChoice(ws))
}
示例#5
0
func (this *Env) EnvStep(action rlglue.Action) (obs rlglue.Observation, r float64, t bool) {
	whichCan := action.Ints()[0]
	process := action.Ints()[1]
	t = true
	for _, can := range this.Cans {
		if !can.Done {
			t = false
		}
	}
	this.Log("%v ", this.Cans)
	if t {
		this.Log("finished\n\n")
		obs = this.makeObs()
		r = 0
		return
	}
	can := this.Cans[whichCan]
	r = -1
	if !can.Done {
		switch process {
		case 0:
			this.Log("painting can %d\n", whichCan+1)
			outcome := stat.NextChoice([]float64{.6, .3, .1})
			switch outcome {
			case 0:
				can.Painted = true
			case 1:
				can.Painted = true
				can.Scratched = true
			case 2:
			}
		case 1:
			this.Log("polishing can %d\n", whichCan+1)
			outcome := stat.NextChoice([]float64{.2, .2, .3, .2, .1})
			switch outcome {
			case 0:
				can.Painted = false
			case 1:
				can.Scratched = false
			case 2:
				can.Polished = true
				can.Painted = false
				can.Scratched = false
			case 3:
				can.Polished = true
				can.Painted = false
			case 4:
			}
		case 2:
			this.Log("shortcut can %d\n", whichCan+1)
			outcome := stat.NextChoice([]float64{0.05, 0.95})
			switch outcome {
			case 0:
				can.Painted = true
				can.Polished = true
			case 1:
			}
		case 3:
			this.Log("finishing can %d\n", whichCan+1)
			if can.Painted && can.Polished && !can.Scratched && !can.Done {
				can.Done = true
				r = 10
			} else {
				t = true
				r = -100000
			}
		}
		this.Cans[whichCan] = can
	}
	obs = this.makeObs()
	return
}