func NewOracle(env Env) (this *Oracle) { this = new(Oracle) this.Env = env this.Task, _ = rlglue.ParseTaskSpec(this.Env.EnvInit()) this.rehash() return }
func (this *OptAgent) AgentInit(taskString string) { this.task, _ = rlglue.ParseTaskSpec(taskString) this.Cfg.NumSystems = len(this.task.Obs.Ints) this.mdp = NewSysMDP(this.Cfg) this.qt = discrete.NewQTable(this.task.Obs.Ints.Count(), this.task.Act.Ints.Count()) vi.ValueIteration(this.qt, this.mdp, 0.1) }
func (ra *BebAgent) AgentInit(taskString string) { ra.task, _ = rlglue.ParseTaskSpec(taskString) if ra.task.DiscountFactor == 1 { ra.task.DiscountFactor = 0.99 } ra.rmdp = NewBebMDP(ra.task, ra.Cfg) ra.qt = discrete.NewQTable(ra.task.Obs.Ints.Count(), ra.task.Act.Ints.Count()) ra.Cfg.RFoo = ra.GetRFoo(ra.task) ra.rmdp.RFoo = ra.Cfg.RFoo }
func (this *Environment) EnvInit() (taskString string) { fstr := "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR %f OBSERVATIONS INTS (%d 0 1) ACTIONS INTS (0 %d) REWARDS (-1.0 1.0)" taskString = fmt.Sprintf(fstr, this.cfg.DiscountFactor, this.cfg.NumSystems, this.cfg.NumSystems) this.task, _ = rlglue.ParseTaskSpec(taskString) this.status = make([]bool, this.cfg.NumSystems) for i := range this.status { this.status[i] = stat.NextBernoulli(this.cfg.StartBoot) == 1 } return }
func TestDepMatch(t *testing.T) { defer nicetrace.Print() stat.Seed(seed) tstr := "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR 1 OBSERVATIONS INTS (0 4) (-1 1) (0 2) ACTIONS INTS (0 1) REWARDS (0 1.0)" task, _ := rlglue.ParseTaskSpec(tstr) stateRanges := task.Obs.Ints actionRanges := task.Act.Ints cfg := ConfigDefault() cfg.Alpha = alpha cfg.M = M genDLs := []*DepLearner{NewDepLearner(0, cfg, stateRanges, actionRanges), NewDepLearner(1, cfg, stateRanges, actionRanges), NewDepLearner(2, cfg, stateRanges, actionRanges)} genDLs[0].SetParents(ParentSet(0).Insert(0, 1)) genDLs[1].SetParents(ParentSet(0).Insert(0, 2)) genDLs[2].SetParents(ParentSet(0).Insert(2)) patDLs := []*DepLearner{NewDepLearner(0, cfg, stateRanges, actionRanges), NewDepLearner(1, cfg, stateRanges, actionRanges), NewDepLearner(2, cfg, stateRanges, actionRanges)} numStates := stateRanges.Count() numActions := actionRanges.Count() RS := stat.Range(int64(numStates)) RA := stat.Range(int64(numActions)) startTime := time.Nanoseconds() lastWrongStep := make([]int, len(genDLs)) for i := 0; i < steps; i++ { s := uint64(RS()) a := uint64(RA()) nv := make([]int32, len(genDLs)) for child := 0; child < len(nv); child++ { nv[child] = genDLs[child].Next(s, a) } for child := 0; child < len(nv); child++ { genDLs[child] = genDLs[child].Update(s, a, nv[child]) } for child := 0; child < len(nv); child++ { patDLs[child] = patDLs[child].Update(s, a, nv[child]) } if i%1 == 0 { for child := 0; child < len(nv); child++ { patDLs[child].ConsiderRandomFlip() if genDLs[child].parents != patDLs[child].parents { lastWrongStep[child] = i } } } } fmt.Println(lastWrongStep) endTime := time.Nanoseconds() duration := endTime - startTime if true { fmt.Printf("Ran in %fms\n", float64(duration)/1e6) } for child := range genDLs { if genDLs[child].parents != patDLs[child].parents { t.Error(fmt.Sprintf("%d: %v != %v", child, genDLs[child].parents.Slice(), patDLs[child].parents.Slice())) } } }
func TestBeliefMatch(t *testing.T) { defer nicetrace.Print() stat.Seed(seed) tstr := "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR 1 OBSERVATIONS INTS (0 4) (-1 1) (0 2) ACTIONS INTS (0 1) REWARDS (0 1.0)" task, _ := rlglue.ParseTaskSpec(tstr) cfg := ConfigDefault() cfg.Alpha = alpha cfg.M = M beliefG := NewBelief(cfg, task) beliefG.learners[0].SetParents(ParentSet(0).Insert(0, 1)) beliefG.learners[1].SetParents(ParentSet(0).Insert(0, 2)) beliefG.learners[2].SetParents(ParentSet(0).Insert(2)) beliefP := NewBelief(cfg, task) numStates := task.Obs.Ints.Count() numActions := task.Act.Ints.Count() RS := stat.Range(int64(numStates)) RA := stat.Range(int64(numActions)) startTime := time.Nanoseconds() lastWrongStep := make([]int, len(task.Obs.Ints)) for i := 0; i < steps; i++ { s := uint64(RS()) a := uint64(RA()) n := beliefG.Next(s, a) beliefG = beliefG.Update(s, a, n).(*Belief) beliefP = beliefP.Update(s, a, n).(*Belief) if i%1 == 0 { beliefP.ConsiderRandomFlipAll() for child, learner := range beliefP.learners { if beliefG.learners[child].parents != learner.parents { lastWrongStep[child] = i } } } } fmt.Println(lastWrongStep) endTime := time.Nanoseconds() duration := endTime - startTime if true { fmt.Printf("Ran in %fms\n", float64(duration)/1e6) } for child := range beliefP.learners { if beliefG.learners[child].parents != beliefP.learners[child].parents { t.Error(fmt.Sprintf("%d: %v != %v", child, beliefG.learners[child].parents.Slice(), beliefP.learners[child].parents.Slice())) } } }
func NewSysMDP(Cfg Config) (this *SysMDP) { this = &SysMDP{Cfg: Cfg} fstr := "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR %f OBSERVATIONS INTS (%d 0 1) ACTIONS INTS (0 %d) REWARDS (-1.0 1.0)" taskString := fmt.Sprintf(fstr, this.Cfg.DiscountFactor, this.Cfg.NumSystems, this.Cfg.NumSystems) this.Task, _ = rlglue.ParseTaskSpec(taskString) this.maxStates = this.Task.Obs.Ints.Count() this.maxActions = this.Task.Act.Ints.Count() this.t = make([][]float64, this.maxStates*this.maxActions) this.r = make([]float64, this.maxStates*this.maxActions) for s := range this.S64() { for a := range this.A64() { k := s.Hashcode() + a.Hashcode()*this.maxStates this.t[k] = make([]float64, this.maxStates) this.r[k] = this.computeR(s, a) for n := range this.S64() { this.t[k][n] = this.computeT(s, a, n) } } } return }
func NewMDP(cfg Config) (this *MDP) { this = new(MDP) this.Cfg = cfg fstr := "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR %f OBSERVATIONS INTS (6 0 1) ACTIONS INTS (0 3) REWARDS (%f %f)" rmin := 0.0 rmax := 1.0 taskString := fmt.Sprintf(fstr, this.Cfg.DiscountFactor, rmin, rmax) this.Task, _ = rlglue.ParseTaskSpec(taskString) this.maxStates = this.Task.Obs.Ints.Count() this.maxActions = this.Task.Act.Ints.Count() this.svs = make([][]int32, this.maxStates) for s := uint64(0); s < this.maxStates; s++ { this.svs[s] = this.Task.Obs.Ints.Values(s) } this.t = make([][]float64, this.maxStates*this.maxActions) this.r = make([]float64, this.maxStates*this.maxActions) for s := range this.S64() { for a := range this.A64() { k := s.Hashcode() + this.maxStates*a.Hashcode() this.r[k] = this.computeR(s, a) this.t[k] = make([]float64, this.maxStates) sum := 0.0 for n := range this.S64() { this.t[k][n] = this.computeT(s, a, n) sum += this.t[k][n] } if sum > 1 { fmt.Fprintf(os.Stderr, "%v %v\n", this.svs[s], a) for n := range this.S64() { if this.t[k][n] != 0 { fmt.Fprintf(os.Stderr, "\t%v : %v\n", this.svs[n], this.t[k][n]) } } } } } return }
func (this *ROARAgent) AgentInit(taskString string) { this.task, _ = rlglue.ParseTaskSpec(taskString) this.numFeatures = len(this.task.Obs.Doubles) }
func (ra *RmaxAgent) AgentInit(taskString string) { ra.task, _ = rlglue.ParseTaskSpec(taskString) ra.rmdp = NewRmaxMDP(ra.task, ra.Cfg.M) ra.qt = discrete.NewQTable(ra.task.Obs.Ints.Count(), ra.task.Act.Ints.Count()) }
func (this *BFS3Agent) AgentInit(taskString string) { this.task, _ = rlglue.ParseTaskSpec(taskString) this.belief = this.prior(this.task) this.ResetPlanner() }
package wumpus import ( "fmt" "github.com/skelterjohn/rlbayes" "go-glue.googlecode.com/hg/rlglue" "go-glue.googlecode.com/hg/rltools/discrete" ) var taskstr = fmt.Sprintf("VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR %f OBSERVATIONS INTS (2 0 3) (0 3) (16 0 %d) ACTIONS INTS (0 3) REWARDS (%f, 1)", Gamma, len(SquareMapToValue), -.01/(1-Gamma)) var task, _ = rlglue.ParseTaskSpec(taskstr) type Belief struct { Hunter term bool hash uint64 } func NewBelief(mb MapBelief) (this *Belief) { this = new(Belief) this.x = 0 this.y = 0 this.dir = 1 this.belief = mb return } func (this *Belief) Hashcode() uint64 { return this.hash } func (this *Belief) LessThan(oi interface{}) bool { return this.hash < oi.(*Belief).hash
func (ra *RmaxFSSSAgent) AgentInit(taskString string) { ra.task, _ = rlglue.ParseTaskSpec(taskString) ra.rmdp = rmax.NewRmaxMDP(ra.task, ra.Cfg.M) }