示例#1
0
func NewOracle(env Env) (this *Oracle) {
	this = new(Oracle)
	this.Env = env
	this.Task, _ = rlglue.ParseTaskSpec(this.Env.EnvInit())
	this.rehash()
	return
}
示例#2
0
文件: opt.go 项目: skelterjohn/rlenv
func (this *OptAgent) AgentInit(taskString string) {
	this.task, _ = rlglue.ParseTaskSpec(taskString)
	this.Cfg.NumSystems = len(this.task.Obs.Ints)
	this.mdp = NewSysMDP(this.Cfg)
	this.qt = discrete.NewQTable(this.task.Obs.Ints.Count(), this.task.Act.Ints.Count())
	vi.ValueIteration(this.qt, this.mdp, 0.1)
}
示例#3
0
文件: beb.go 项目: skelterjohn/rlalg
func (ra *BebAgent) AgentInit(taskString string) {
	ra.task, _ = rlglue.ParseTaskSpec(taskString)
	if ra.task.DiscountFactor == 1 {
		ra.task.DiscountFactor = 0.99
	}
	ra.rmdp = NewBebMDP(ra.task, ra.Cfg)
	ra.qt = discrete.NewQTable(ra.task.Obs.Ints.Count(), ra.task.Act.Ints.Count())
	ra.Cfg.RFoo = ra.GetRFoo(ra.task)
	ra.rmdp.RFoo = ra.Cfg.RFoo
}
示例#4
0
func (this *Environment) EnvInit() (taskString string) {
	fstr := "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR %f OBSERVATIONS INTS (%d 0 1) ACTIONS INTS (0 %d) REWARDS (-1.0 1.0)"
	taskString = fmt.Sprintf(fstr, this.cfg.DiscountFactor, this.cfg.NumSystems, this.cfg.NumSystems)
	this.task, _ = rlglue.ParseTaskSpec(taskString)
	this.status = make([]bool, this.cfg.NumSystems)
	for i := range this.status {
		this.status[i] = stat.NextBernoulli(this.cfg.StartBoot) == 1
	}
	return
}
示例#5
0
func TestDepMatch(t *testing.T) {
	defer nicetrace.Print()
	stat.Seed(seed)
	tstr := "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR 1 OBSERVATIONS INTS (0 4) (-1 1) (0 2) ACTIONS INTS (0 1) REWARDS (0 1.0)"
	task, _ := rlglue.ParseTaskSpec(tstr)
	stateRanges := task.Obs.Ints
	actionRanges := task.Act.Ints
	cfg := ConfigDefault()
	cfg.Alpha = alpha
	cfg.M = M
	genDLs := []*DepLearner{NewDepLearner(0, cfg, stateRanges, actionRanges), NewDepLearner(1, cfg, stateRanges, actionRanges), NewDepLearner(2, cfg, stateRanges, actionRanges)}
	genDLs[0].SetParents(ParentSet(0).Insert(0, 1))
	genDLs[1].SetParents(ParentSet(0).Insert(0, 2))
	genDLs[2].SetParents(ParentSet(0).Insert(2))
	patDLs := []*DepLearner{NewDepLearner(0, cfg, stateRanges, actionRanges), NewDepLearner(1, cfg, stateRanges, actionRanges), NewDepLearner(2, cfg, stateRanges, actionRanges)}
	numStates := stateRanges.Count()
	numActions := actionRanges.Count()
	RS := stat.Range(int64(numStates))
	RA := stat.Range(int64(numActions))
	startTime := time.Nanoseconds()
	lastWrongStep := make([]int, len(genDLs))
	for i := 0; i < steps; i++ {
		s := uint64(RS())
		a := uint64(RA())
		nv := make([]int32, len(genDLs))
		for child := 0; child < len(nv); child++ {
			nv[child] = genDLs[child].Next(s, a)
		}
		for child := 0; child < len(nv); child++ {
			genDLs[child] = genDLs[child].Update(s, a, nv[child])
		}
		for child := 0; child < len(nv); child++ {
			patDLs[child] = patDLs[child].Update(s, a, nv[child])
		}
		if i%1 == 0 {
			for child := 0; child < len(nv); child++ {
				patDLs[child].ConsiderRandomFlip()
				if genDLs[child].parents != patDLs[child].parents {
					lastWrongStep[child] = i
				}
			}
		}
	}
	fmt.Println(lastWrongStep)
	endTime := time.Nanoseconds()
	duration := endTime - startTime
	if true {
		fmt.Printf("Ran in %fms\n", float64(duration)/1e6)
	}
	for child := range genDLs {
		if genDLs[child].parents != patDLs[child].parents {
			t.Error(fmt.Sprintf("%d: %v != %v", child, genDLs[child].parents.Slice(), patDLs[child].parents.Slice()))
		}
	}
}
示例#6
0
func TestBeliefMatch(t *testing.T) {
	defer nicetrace.Print()
	stat.Seed(seed)
	tstr := "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR 1 OBSERVATIONS INTS (0 4) (-1 1) (0 2) ACTIONS INTS (0 1) REWARDS (0 1.0)"
	task, _ := rlglue.ParseTaskSpec(tstr)
	cfg := ConfigDefault()
	cfg.Alpha = alpha
	cfg.M = M
	beliefG := NewBelief(cfg, task)
	beliefG.learners[0].SetParents(ParentSet(0).Insert(0, 1))
	beliefG.learners[1].SetParents(ParentSet(0).Insert(0, 2))
	beliefG.learners[2].SetParents(ParentSet(0).Insert(2))
	beliefP := NewBelief(cfg, task)
	numStates := task.Obs.Ints.Count()
	numActions := task.Act.Ints.Count()
	RS := stat.Range(int64(numStates))
	RA := stat.Range(int64(numActions))
	startTime := time.Nanoseconds()
	lastWrongStep := make([]int, len(task.Obs.Ints))
	for i := 0; i < steps; i++ {
		s := uint64(RS())
		a := uint64(RA())
		n := beliefG.Next(s, a)
		beliefG = beliefG.Update(s, a, n).(*Belief)
		beliefP = beliefP.Update(s, a, n).(*Belief)
		if i%1 == 0 {
			beliefP.ConsiderRandomFlipAll()
			for child, learner := range beliefP.learners {
				if beliefG.learners[child].parents != learner.parents {
					lastWrongStep[child] = i
				}
			}
		}
	}
	fmt.Println(lastWrongStep)
	endTime := time.Nanoseconds()
	duration := endTime - startTime
	if true {
		fmt.Printf("Ran in %fms\n", float64(duration)/1e6)
	}
	for child := range beliefP.learners {
		if beliefG.learners[child].parents != beliefP.learners[child].parents {
			t.Error(fmt.Sprintf("%d: %v != %v", child, beliefG.learners[child].parents.Slice(), beliefP.learners[child].parents.Slice()))
		}
	}
}
示例#7
0
文件: mdp.go 项目: skelterjohn/rlenv
func NewSysMDP(Cfg Config) (this *SysMDP) {
	this = &SysMDP{Cfg: Cfg}
	fstr := "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR %f OBSERVATIONS INTS (%d 0 1) ACTIONS INTS (0 %d) REWARDS (-1.0 1.0)"
	taskString := fmt.Sprintf(fstr, this.Cfg.DiscountFactor, this.Cfg.NumSystems, this.Cfg.NumSystems)
	this.Task, _ = rlglue.ParseTaskSpec(taskString)
	this.maxStates = this.Task.Obs.Ints.Count()
	this.maxActions = this.Task.Act.Ints.Count()
	this.t = make([][]float64, this.maxStates*this.maxActions)
	this.r = make([]float64, this.maxStates*this.maxActions)
	for s := range this.S64() {
		for a := range this.A64() {
			k := s.Hashcode() + a.Hashcode()*this.maxStates
			this.t[k] = make([]float64, this.maxStates)
			this.r[k] = this.computeR(s, a)
			for n := range this.S64() {
				this.t[k][n] = this.computeT(s, a, n)
			}
		}
	}
	return
}
示例#8
0
文件: mdp.go 项目: skelterjohn/rlenv
func NewMDP(cfg Config) (this *MDP) {
	this = new(MDP)
	this.Cfg = cfg
	fstr := "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR %f OBSERVATIONS INTS (6 0 1) ACTIONS INTS (0 3) REWARDS (%f %f)"
	rmin := 0.0
	rmax := 1.0
	taskString := fmt.Sprintf(fstr, this.Cfg.DiscountFactor, rmin, rmax)
	this.Task, _ = rlglue.ParseTaskSpec(taskString)
	this.maxStates = this.Task.Obs.Ints.Count()
	this.maxActions = this.Task.Act.Ints.Count()
	this.svs = make([][]int32, this.maxStates)
	for s := uint64(0); s < this.maxStates; s++ {
		this.svs[s] = this.Task.Obs.Ints.Values(s)
	}
	this.t = make([][]float64, this.maxStates*this.maxActions)
	this.r = make([]float64, this.maxStates*this.maxActions)
	for s := range this.S64() {
		for a := range this.A64() {
			k := s.Hashcode() + this.maxStates*a.Hashcode()
			this.r[k] = this.computeR(s, a)
			this.t[k] = make([]float64, this.maxStates)
			sum := 0.0
			for n := range this.S64() {
				this.t[k][n] = this.computeT(s, a, n)
				sum += this.t[k][n]
			}
			if sum > 1 {
				fmt.Fprintf(os.Stderr, "%v %v\n", this.svs[s], a)
				for n := range this.S64() {
					if this.t[k][n] != 0 {
						fmt.Fprintf(os.Stderr, "\t%v : %v\n", this.svs[n], this.t[k][n])
					}
				}
			}
		}
	}
	return
}
示例#9
0
文件: agent.go 项目: postfix/rlbayes
func (this *ROARAgent) AgentInit(taskString string) {
	this.task, _ = rlglue.ParseTaskSpec(taskString)
	this.numFeatures = len(this.task.Obs.Doubles)
}
示例#10
0
文件: rmax.go 项目: skelterjohn/rlalg
func (ra *RmaxAgent) AgentInit(taskString string) {
	ra.task, _ = rlglue.ParseTaskSpec(taskString)
	ra.rmdp = NewRmaxMDP(ra.task, ra.Cfg.M)
	ra.qt = discrete.NewQTable(ra.task.Obs.Ints.Count(), ra.task.Act.Ints.Count())
}
示例#11
0
文件: bfs3.go 项目: skelterjohn/rlalg
func (this *BFS3Agent) AgentInit(taskString string) {
	this.task, _ = rlglue.ParseTaskSpec(taskString)
	this.belief = this.prior(this.task)
	this.ResetPlanner()
}
示例#12
0
package wumpus

import (
	"fmt"
	"github.com/skelterjohn/rlbayes"
	"go-glue.googlecode.com/hg/rlglue"
	"go-glue.googlecode.com/hg/rltools/discrete"
)

var taskstr = fmt.Sprintf("VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR %f OBSERVATIONS INTS (2 0 3) (0 3) (16 0 %d) ACTIONS INTS (0 3) REWARDS (%f, 1)", Gamma, len(SquareMapToValue), -.01/(1-Gamma))
var task, _ = rlglue.ParseTaskSpec(taskstr)

type Belief struct {
	Hunter
	term bool
	hash uint64
}

func NewBelief(mb MapBelief) (this *Belief) {
	this = new(Belief)
	this.x = 0
	this.y = 0
	this.dir = 1
	this.belief = mb
	return
}
func (this *Belief) Hashcode() uint64 {
	return this.hash
}
func (this *Belief) LessThan(oi interface{}) bool {
	return this.hash < oi.(*Belief).hash
示例#13
0
func (ra *RmaxFSSSAgent) AgentInit(taskString string) {
	ra.task, _ = rlglue.ParseTaskSpec(taskString)
	ra.rmdp = rmax.NewRmaxMDP(ra.task, ra.Cfg.M)
}