func (this *BFS3Agent) getAction() (index discrete.Action) { if this.fs3.Dump { println("getAction") } if this.Cfg.ReplanEachStep { this.ResetPlanner() } if this.fs3 == nil { index = discrete.Action(stat.NextRange(int64(this.task.Act.Ints.Count()))) return } node := this.fs3.GetNode(this.stepsWithPlanner, this.belief) var expanded uint64 for i := uint64(0); i < this.Cfg.MaxTrajectories; i++ { expandedThisTime := this.fs3.RunTrajectory(node, this.Cfg.Depth) expanded += expandedThisTime if this.Cfg.Budget != 0 && expanded > this.Cfg.Budget { break } } if this.Dump { this.fs3.Dump = true fmt.Printf("root:\n%v\n\n", node) this.fs3.RunTrajectory(node, this.Cfg.Depth) } index = discrete.Action(this.fs3.GetAction(node)) fmt.Fprintf(os.Stderr, "%v\n", this.fs3.GetQs(node)) if !this.Cfg.FS3.Shallow { this.fs3.ClearLevel(this.stepsWithPlanner) this.stepsWithPlanner++ } time.Sleep(1e9) return }
func (ra *RmaxFSSSAgent) GetAction() (action discrete.Action) { if ra.s == nil { action = discrete.Action(stat.NextRange(int64(ra.task.Act.Ints.Count()))) return } node := ra.s.GetNode(ra.stepsWithPlanner, ra.mdpo) action = discrete.Action(ra.s.GetAction(node)) ra.s.ClearLevel(ra.stepsWithPlanner) return }
func (ra *RmaxFSSSAgent) AgentStart(obs rlglue.Observation) (act rlglue.Action) { ra.stepsWithPlanner = 0 ra.lastState = discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) ra.Plan() act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.GetAction().Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }
func (this *Agent) AgentStart(obs rlglue.Observation) (act rlglue.Action) { this.stepsWithPlanner = 0 this.lastState = discrete.State(this.mdp.GetTask().Obs.Ints.Index(obs.Ints())) this.Plan() act = rlglue.NewAction(this.mdp.GetTask().Act.Ints.Values(this.GetAction()), []float64{}, []byte{}) this.lastAction = discrete.Action(this.mdp.GetTask().Act.Ints.Index(act.Ints())) return }
func (this *Env) EnvStep(action rlglue.Action) (obs rlglue.Observation, r float64, t bool) { obs = this.obs var o discrete.Oracle a := discrete.Action(action.Ints()[0]) o, r = this.belief.Next(a) this.belief = o.(*Belief) t = false return }
func (this *FObjTransition) Next(s discrete.State, a discrete.Action) (n discrete.State) { avalues := this.bg.Task.Act.Ints.Values(a.Hashcode()) which, act := avalues[0], avalues[1] sobjs := this.bg.GetObjs(s) nobjs := append([]discrete.State{}, sobjs...) nobjs[which] = this.ObjFDM.Next(sobjs[which], discrete.Action(act)) n = this.bg.GetState(nobjs) return }
func (ra *BebAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) { nextState := discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) learned := ra.rmdp.Observe(ra.lastState, ra.lastAction, nextState, reward) if learned { vi.ValueIteration(ra.qt, ra.rmdp, ra.Cfg.Epsilon) } ra.lastState = nextState act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.qt.Pi(ra.lastState).Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }
func (this *FObjTransition) Update(s discrete.State, a discrete.Action, n discrete.State) (next TransitionBelief) { nt := new(FObjTransition) *nt = *this avalues := this.bg.Task.Act.Ints.Values(a.Hashcode()) which, act := avalues[0], avalues[1] sobjs := this.bg.GetObjs(s) nobjs := this.bg.GetObjs(n) nt.ObjFDM = this.ObjFDM.Update(sobjs[which], discrete.Action(act), nobjs[which]).(*FDMTransition) next = nt return }
func (this *SysMDP) computeT(s discrete.State, a discrete.Action, n discrete.State) (p float64) { sv := this.Task.Obs.Ints.Values(s.Hashcode()) nv := this.Task.Obs.Ints.Values(n.Hashcode()) p = 1 for i, no := range nv { var fp float64 if a == discrete.Action(i) { fp = 0 } else { fp = this.Cfg.FailBase li := (i + this.Cfg.NumSystems - 1) % this.Cfg.NumSystems ri := (i + 1) % this.Cfg.NumSystems ls := sv[li] == 1 rs := sv[ri] == 1 if li < i { ls = nv[li] == 1 } if !ls { fp += this.Cfg.FailIncr } if !rs { fp += this.Cfg.FailIncr } } if sv[i] == 1 || a == discrete.Action(i) { if no == 0 { p *= fp } else { p *= 1 - fp } } else { if no == 0 { p *= this.Cfg.FailStay } else { p *= 1 - this.Cfg.FailStay } } } return }
func (ra *RmaxFSSSAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) { ra.stepsWithPlanner++ nextState := discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) learned := ra.rmdp.Observe(ra.lastState, ra.lastAction, nextState, reward) if learned { ra.Forget() } ra.lastState = nextState ra.Plan() act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.GetAction().Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }
func (n *Node) backup() { n.block.Lock() defer n.block.Unlock() //fmt.Printf("+*Node.backup(%v)()\n", n.o.Hashcode()) //defer println("-*Node.backup") n.currentBestAction = 0 //max it n.vlower = n.s.Vmin n.vupper = n.s.Vmin //update the qs for each action offset := uint64(rand.Intn(int(n.s.NumActions))) af, haveActionFilter := n.o.(ActionFilter) for ao := uint64(0); ao < n.s.NumActions; ao++ { a := discrete.Action((ao + offset) % n.s.NumActions) avail := !haveActionFilter || af.ActionAvailable(a) n.qlower[a] = 0 n.qupper[a] = 0 //E[V(s')] part var mostUncertainty float64 for nn, count := range n.branches[a] { weight := count / float64(n.s.Cfg.C) wvupper := weight * nn.vupper wvlower := weight * nn.vlower n.qlower[a] += wvlower n.qupper[a] += wvupper uncertainty := nn.getUncertainty() * count if avail && uncertainty >= mostUncertainty { mostUncertainty, n.currentMostUncertains[a] = uncertainty, nn } } //gamma part n.qlower[a] *= n.s.Gamma n.qupper[a] *= n.s.Gamma //R part n.qlower[a] += n.r[a] n.qupper[a] += n.r[a] if avail { //max operator if n.qlower[a] > n.vlower { n.vlower = n.qlower[a] } if n.qupper[a] > n.vupper { n.vupper = n.qupper[a] n.currentBestAction = a } } } }
func (n *Node) expand() (expanded bool) { n.block.Lock() defer n.block.Unlock() //println("+*Node.expand") //defer println("-*Node.expand") if n.o.Terminal() { return false } expanded = true n.leaf = false af, haveActionFilter := n.o.(ActionFilter) for a := discrete.Action(0); a.Hashcode() < n.s.NumActions; a++ { avail := !haveActionFilter || af.ActionAvailable(a) n.r[a] = 0 var mostUncertainty float64 for i := uint64(0); i < n.s.Cfg.C; i++ { no, r := n.o.Next(a) if no == nil { panic("Next() -> nil") } n.r[a] += r //get the Node for no (next oracle) nn := n.s.GetNode(n.depth+1, no) count := n.branches[a][nn] + 1 n.branches[a][nn] = count uncertainty := count * nn.getUncertainty() if avail && uncertainty >= mostUncertainty { mostUncertainty, n.currentMostUncertains[a] = uncertainty, nn } } n.r[a] /= float64(n.s.Cfg.C) } n.o = nil return }
func (ra *BebAgent) AgentStart(obs rlglue.Observation) (act rlglue.Action) { ra.lastState = discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.qt.Pi(ra.lastState).Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }