func (this *Environment) EnvStep(action rlglue.Action) (obs rlglue.Observation, r float64, t bool) { fps := make([]float64, len(this.status)) reboot := int(action.Ints()[0]) for i := range this.status { if reboot == i { fps[i] = 0 } else { fps[i] = this.cfg.FailBase li := (i + this.cfg.NumSystems - 1) % this.cfg.NumSystems ri := (i + 1) % this.cfg.NumSystems if !this.status[li] { fps[i] += this.cfg.FailIncr } if !this.status[ri] { fps[i] += this.cfg.FailIncr } } if this.status[i] || reboot == i { this.status[i] = stat.NextUniform() < (1 - fps[i]) } else { this.status[i] = stat.NextUniform() < (1 - this.cfg.FailStay) } if this.status[i] { r++ } } if reboot < this.cfg.NumSystems { r-- } obs = this.ConstructObs() return }
func (this *Env) EnvStep(action rlglue.Action) (obs rlglue.Observation, r float64, t bool) { obs = this.obs var o discrete.Oracle a := discrete.Action(action.Ints()[0]) o, r = this.belief.Next(a) this.belief = o.(*Belief) t = false return }
func (ge *Env) EnvStep(action rlglue.Action) (obs rlglue.Observation, r float64, t bool) { t = ge.Pos.Equals(ge.Goal) r = -1 if t { r = 0 } dir := action.Ints()[0] u := rand.Float64() if u < .1 { dir += 1 } else if u < .2 { dir += 3 } dir %= 4 newPos := ge.Pos switch dir { case 0: newPos.Y++ case 1: newPos.X++ case 2: newPos.Y-- case 3: newPos.X-- } if newPos.X < 0 { newPos.X = 0 } if newPos.Y < 0 { newPos.Y = 0 } if newPos.X >= ge.Width { newPos.X = ge.Width - 1 } if newPos.Y >= ge.Height { newPos.Y = ge.Height - 1 } ge.Pos = newPos obs = rlglue.NewObservation([]int32{ge.Pos.X, ge.Pos.Y}, []float64{}, []byte{}) return }
func (this *Env) EnvStep(action rlglue.Action) (obs rlglue.Observation, r float64, t bool) { lastObs := this.MakeObs() println(action.Ints()[0]) switch action.Ints()[0] { case 0: this.hunter, r, t = this.hunter.TurnLeft() case 1: this.hunter, r, t = this.hunter.TurnRight() case 2: this.hunter, r, t = this.hunter.Move() case 3: this.hunter, r, t = this.hunter.Shoot() } if !t { obs = this.MakeObs() fmt.Fprintf(os.Stderr, "Sending back\n%v\n%v\n", this.hunter, this.observed) } else { obs = lastObs } return }
func (this *BFS3Agent) getActionIndex(act rlglue.Action) (index uint64) { return this.task.Act.Ints.Index(act.Ints()) }
func (this *Env) EnvStep(action rlglue.Action) (obs rlglue.Observation, r float64, t bool) { whichCan := action.Ints()[0] process := action.Ints()[1] t = true for _, can := range this.Cans { if !can.Done { t = false } } this.Log("%v ", this.Cans) if t { this.Log("finished\n\n") obs = this.makeObs() r = 0 return } can := this.Cans[whichCan] r = -1 if !can.Done { switch process { case 0: this.Log("painting can %d\n", whichCan+1) outcome := stat.NextChoice([]float64{.6, .3, .1}) switch outcome { case 0: can.Painted = true case 1: can.Painted = true can.Scratched = true case 2: } case 1: this.Log("polishing can %d\n", whichCan+1) outcome := stat.NextChoice([]float64{.2, .2, .3, .2, .1}) switch outcome { case 0: can.Painted = false case 1: can.Scratched = false case 2: can.Polished = true can.Painted = false can.Scratched = false case 3: can.Polished = true can.Painted = false case 4: } case 2: this.Log("shortcut can %d\n", whichCan+1) outcome := stat.NextChoice([]float64{0.05, 0.95}) switch outcome { case 0: can.Painted = true can.Polished = true case 1: } case 3: this.Log("finishing can %d\n", whichCan+1) if can.Painted && can.Polished && !can.Scratched && !can.Done { can.Done = true r = 10 } else { t = true r = -100000 } } this.Cans[whichCan] = can } obs = this.makeObs() return }