func (this *MDPTransition) Next(s discrete.State, a discrete.Action) (n discrete.State) { weights := make([]float64, this.MDP.NumStates()) for n := range this.MDP.S64() { //for n := uint64(0); n < this.MDP.S(); n++ { weights[n] = this.MDP.T(s, a, n) } n = discrete.State(stat.NextChoice(weights)) return }
func (this *DirSA) Next() (n discrete.State) { if this.weights == nil { this.weights = make([]float64, len(this.counts)) for i, c := range this.counts { this.weights[i] = c / this.total } } n = discrete.State(stat.NextChoice(this.weights)) return }
func (this *Posterior) Next(s discrete.State, a discrete.Action) (n discrete.State) { c := uint64(this.C.Get(int(s))) ck := c*this.bg.NumActions + a.Hashcode() hist := this.clusterData[ck] fhist := append([]float64{}, this.bg.Beta...) total := 0.0 for i, c := range hist { fhist[i] += float64(c) total += fhist[i] } for i := range fhist { fhist[i] /= total } o := discrete.State(stat.NextChoice(fhist)) n = this.bg.OutcomeToNext(s, o) return }
func LogChoice(lws []float64) int { max := lws[0] for _, lw := range lws[1:len(lws)] { if lw > max { max = lw } } ws := make([]float64, len(lws)) var sum float64 for i, lw := range lws { ws[i] = math.Exp(lw - max) sum += ws[i] } norm := 1 / sum for i := range ws { ws[i] *= norm } return int(stat.NextChoice(ws)) }
func (this *Env) EnvStep(action rlglue.Action) (obs rlglue.Observation, r float64, t bool) { whichCan := action.Ints()[0] process := action.Ints()[1] t = true for _, can := range this.Cans { if !can.Done { t = false } } this.Log("%v ", this.Cans) if t { this.Log("finished\n\n") obs = this.makeObs() r = 0 return } can := this.Cans[whichCan] r = -1 if !can.Done { switch process { case 0: this.Log("painting can %d\n", whichCan+1) outcome := stat.NextChoice([]float64{.6, .3, .1}) switch outcome { case 0: can.Painted = true case 1: can.Painted = true can.Scratched = true case 2: } case 1: this.Log("polishing can %d\n", whichCan+1) outcome := stat.NextChoice([]float64{.2, .2, .3, .2, .1}) switch outcome { case 0: can.Painted = false case 1: can.Scratched = false case 2: can.Polished = true can.Painted = false can.Scratched = false case 3: can.Polished = true can.Painted = false case 4: } case 2: this.Log("shortcut can %d\n", whichCan+1) outcome := stat.NextChoice([]float64{0.05, 0.95}) switch outcome { case 0: can.Painted = true can.Polished = true case 1: } case 3: this.Log("finishing can %d\n", whichCan+1) if can.Painted && can.Polished && !can.Scratched && !can.Done { can.Done = true r = 10 } else { t = true r = -100000 } } this.Cans[whichCan] = can } obs = this.makeObs() return }