Example #1
0
func (this *BFS3Agent) getAction() (index discrete.Action) {
	if this.fs3.Dump {
		println("getAction")
	}
	if this.Cfg.ReplanEachStep {
		this.ResetPlanner()
	}
	if this.fs3 == nil {
		index = discrete.Action(stat.NextRange(int64(this.task.Act.Ints.Count())))
		return
	}
	node := this.fs3.GetNode(this.stepsWithPlanner, this.belief)
	var expanded uint64
	for i := uint64(0); i < this.Cfg.MaxTrajectories; i++ {
		expandedThisTime := this.fs3.RunTrajectory(node, this.Cfg.Depth)
		expanded += expandedThisTime
		if this.Cfg.Budget != 0 && expanded > this.Cfg.Budget {
			break
		}
	}
	if this.Dump {
		this.fs3.Dump = true
		fmt.Printf("root:\n%v\n\n", node)
		this.fs3.RunTrajectory(node, this.Cfg.Depth)
	}
	index = discrete.Action(this.fs3.GetAction(node))
	fmt.Fprintf(os.Stderr, "%v\n", this.fs3.GetQs(node))
	if !this.Cfg.FS3.Shallow {
		this.fs3.ClearLevel(this.stepsWithPlanner)
		this.stepsWithPlanner++
	}
	time.Sleep(1e9)
	return
}
Example #2
0
func (ra *RmaxFSSSAgent) GetAction() (action discrete.Action) {
	if ra.s == nil {
		action = discrete.Action(stat.NextRange(int64(ra.task.Act.Ints.Count())))
		return
	}
	node := ra.s.GetNode(ra.stepsWithPlanner, ra.mdpo)
	action = discrete.Action(ra.s.GetAction(node))
	ra.s.ClearLevel(ra.stepsWithPlanner)
	return
}
Example #3
0
func (ra *RmaxFSSSAgent) AgentStart(obs rlglue.Observation) (act rlglue.Action) {
	ra.stepsWithPlanner = 0
	ra.lastState = discrete.State(ra.task.Obs.Ints.Index(obs.Ints()))
	ra.Plan()
	act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.GetAction().Hashcode()), []float64{}, []byte{})
	ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints()))
	return
}
Example #4
0
func (this *Agent) AgentStart(obs rlglue.Observation) (act rlglue.Action) {
	this.stepsWithPlanner = 0
	this.lastState = discrete.State(this.mdp.GetTask().Obs.Ints.Index(obs.Ints()))
	this.Plan()
	act = rlglue.NewAction(this.mdp.GetTask().Act.Ints.Values(this.GetAction()), []float64{}, []byte{})
	this.lastAction = discrete.Action(this.mdp.GetTask().Act.Ints.Index(act.Ints()))
	return
}
Example #5
0
func (this *Env) EnvStep(action rlglue.Action) (obs rlglue.Observation, r float64, t bool) {
	obs = this.obs
	var o discrete.Oracle
	a := discrete.Action(action.Ints()[0])
	o, r = this.belief.Next(a)
	this.belief = o.(*Belief)
	t = false
	return
}
Example #6
0
func (this *FObjTransition) Next(s discrete.State, a discrete.Action) (n discrete.State) {
	avalues := this.bg.Task.Act.Ints.Values(a.Hashcode())
	which, act := avalues[0], avalues[1]
	sobjs := this.bg.GetObjs(s)
	nobjs := append([]discrete.State{}, sobjs...)
	nobjs[which] = this.ObjFDM.Next(sobjs[which], discrete.Action(act))
	n = this.bg.GetState(nobjs)
	return
}
Example #7
0
func (ra *BebAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) {
	nextState := discrete.State(ra.task.Obs.Ints.Index(obs.Ints()))
	learned := ra.rmdp.Observe(ra.lastState, ra.lastAction, nextState, reward)
	if learned {
		vi.ValueIteration(ra.qt, ra.rmdp, ra.Cfg.Epsilon)
	}
	ra.lastState = nextState
	act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.qt.Pi(ra.lastState).Hashcode()), []float64{}, []byte{})
	ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints()))
	return
}
Example #8
0
func (this *FObjTransition) Update(s discrete.State, a discrete.Action, n discrete.State) (next TransitionBelief) {
	nt := new(FObjTransition)
	*nt = *this
	avalues := this.bg.Task.Act.Ints.Values(a.Hashcode())
	which, act := avalues[0], avalues[1]
	sobjs := this.bg.GetObjs(s)
	nobjs := this.bg.GetObjs(n)
	nt.ObjFDM = this.ObjFDM.Update(sobjs[which], discrete.Action(act), nobjs[which]).(*FDMTransition)
	next = nt
	return
}
Example #9
0
func (this *SysMDP) computeT(s discrete.State, a discrete.Action, n discrete.State) (p float64) {
	sv := this.Task.Obs.Ints.Values(s.Hashcode())
	nv := this.Task.Obs.Ints.Values(n.Hashcode())
	p = 1
	for i, no := range nv {
		var fp float64
		if a == discrete.Action(i) {
			fp = 0
		} else {
			fp = this.Cfg.FailBase
			li := (i + this.Cfg.NumSystems - 1) % this.Cfg.NumSystems
			ri := (i + 1) % this.Cfg.NumSystems
			ls := sv[li] == 1
			rs := sv[ri] == 1
			if li < i {
				ls = nv[li] == 1
			}
			if !ls {
				fp += this.Cfg.FailIncr
			}
			if !rs {
				fp += this.Cfg.FailIncr
			}
		}
		if sv[i] == 1 || a == discrete.Action(i) {
			if no == 0 {
				p *= fp
			} else {
				p *= 1 - fp
			}
		} else {
			if no == 0 {
				p *= this.Cfg.FailStay
			} else {
				p *= 1 - this.Cfg.FailStay
			}
		}
	}
	return
}
Example #10
0
func (ra *RmaxFSSSAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) {
	ra.stepsWithPlanner++
	nextState := discrete.State(ra.task.Obs.Ints.Index(obs.Ints()))
	learned := ra.rmdp.Observe(ra.lastState, ra.lastAction, nextState, reward)
	if learned {
		ra.Forget()
	}
	ra.lastState = nextState
	ra.Plan()
	act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.GetAction().Hashcode()), []float64{}, []byte{})
	ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints()))
	return
}
Example #11
0
func (n *Node) backup() {
	n.block.Lock()
	defer n.block.Unlock()
	//fmt.Printf("+*Node.backup(%v)()\n", n.o.Hashcode())
	//defer println("-*Node.backup")
	n.currentBestAction = 0
	//max it
	n.vlower = n.s.Vmin
	n.vupper = n.s.Vmin
	//update the qs for each action

	offset := uint64(rand.Intn(int(n.s.NumActions)))

	af, haveActionFilter := n.o.(ActionFilter)

	for ao := uint64(0); ao < n.s.NumActions; ao++ {
		a := discrete.Action((ao + offset) % n.s.NumActions)
		avail := !haveActionFilter || af.ActionAvailable(a)
		n.qlower[a] = 0
		n.qupper[a] = 0
		//E[V(s')] part
		var mostUncertainty float64
		for nn, count := range n.branches[a] {
			weight := count / float64(n.s.Cfg.C)
			wvupper := weight * nn.vupper
			wvlower := weight * nn.vlower
			n.qlower[a] += wvlower
			n.qupper[a] += wvupper
			uncertainty := nn.getUncertainty() * count
			if avail && uncertainty >= mostUncertainty {
				mostUncertainty, n.currentMostUncertains[a] = uncertainty, nn
			}
		}
		//gamma part
		n.qlower[a] *= n.s.Gamma
		n.qupper[a] *= n.s.Gamma
		//R part
		n.qlower[a] += n.r[a]
		n.qupper[a] += n.r[a]
		if avail {
			//max operator
			if n.qlower[a] > n.vlower {
				n.vlower = n.qlower[a]
			}
			if n.qupper[a] > n.vupper {
				n.vupper = n.qupper[a]
				n.currentBestAction = a
			}
		}
	}
}
Example #12
0
func (n *Node) expand() (expanded bool) {
	n.block.Lock()
	defer n.block.Unlock()
	//println("+*Node.expand")
	//defer println("-*Node.expand")
	if n.o.Terminal() {
		return false
	}
	expanded = true
	n.leaf = false

	af, haveActionFilter := n.o.(ActionFilter)

	for a := discrete.Action(0); a.Hashcode() < n.s.NumActions; a++ {
		avail := !haveActionFilter || af.ActionAvailable(a)
		n.r[a] = 0
		var mostUncertainty float64
		for i := uint64(0); i < n.s.Cfg.C; i++ {
			no, r := n.o.Next(a)
			if no == nil {
				panic("Next() -> nil")
			}
			n.r[a] += r
			//get the Node for no (next oracle)
			nn := n.s.GetNode(n.depth+1, no)
			count := n.branches[a][nn] + 1
			n.branches[a][nn] = count

			uncertainty := count * nn.getUncertainty()
			if avail && uncertainty >= mostUncertainty {
				mostUncertainty, n.currentMostUncertains[a] = uncertainty, nn
			}
		}
		n.r[a] /= float64(n.s.Cfg.C)
	}

	n.o = nil

	return
}
Example #13
0
func (ra *BebAgent) AgentStart(obs rlglue.Observation) (act rlglue.Action) {
	ra.lastState = discrete.State(ra.task.Obs.Ints.Index(obs.Ints()))
	act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.qt.Pi(ra.lastState).Hashcode()), []float64{}, []byte{})
	ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints()))
	return
}