Ejemplo n.º 1
0
func (this *Environment) EnvStep(action rlglue.Action) (obs rlglue.Observation, r float64, t bool) {
	fps := make([]float64, len(this.status))
	reboot := int(action.Ints()[0])
	for i := range this.status {
		if reboot == i {
			fps[i] = 0
		} else {
			fps[i] = this.cfg.FailBase
			li := (i + this.cfg.NumSystems - 1) % this.cfg.NumSystems
			ri := (i + 1) % this.cfg.NumSystems
			if !this.status[li] {
				fps[i] += this.cfg.FailIncr
			}
			if !this.status[ri] {
				fps[i] += this.cfg.FailIncr
			}
		}
		if this.status[i] || reboot == i {
			this.status[i] = stat.NextUniform() < (1 - fps[i])
		} else {
			this.status[i] = stat.NextUniform() < (1 - this.cfg.FailStay)
		}
		if this.status[i] {
			r++
		}
	}
	if reboot < this.cfg.NumSystems {
		r--
	}
	obs = this.ConstructObs()
	return
}
Ejemplo n.º 2
0
func (this *Env) EnvStep(action rlglue.Action) (obs rlglue.Observation, r float64, t bool) {
	obs = this.obs
	var o discrete.Oracle
	a := discrete.Action(action.Ints()[0])
	o, r = this.belief.Next(a)
	this.belief = o.(*Belief)
	t = false
	return
}
Ejemplo n.º 3
0
func (ge *Env) EnvStep(action rlglue.Action) (obs rlglue.Observation, r float64, t bool) {
	t = ge.Pos.Equals(ge.Goal)
	r = -1
	if t {
		r = 0
	}
	dir := action.Ints()[0]
	u := rand.Float64()
	if u < .1 {
		dir += 1
	} else if u < .2 {
		dir += 3
	}
	dir %= 4
	newPos := ge.Pos
	switch dir {
	case 0:
		newPos.Y++
	case 1:
		newPos.X++
	case 2:
		newPos.Y--
	case 3:
		newPos.X--
	}
	if newPos.X < 0 {
		newPos.X = 0
	}
	if newPos.Y < 0 {
		newPos.Y = 0
	}
	if newPos.X >= ge.Width {
		newPos.X = ge.Width - 1
	}
	if newPos.Y >= ge.Height {
		newPos.Y = ge.Height - 1
	}
	ge.Pos = newPos
	obs = rlglue.NewObservation([]int32{ge.Pos.X, ge.Pos.Y}, []float64{}, []byte{})
	return
}
Ejemplo n.º 4
0
func (this *Env) EnvStep(action rlglue.Action) (obs rlglue.Observation, r float64, t bool) {
	lastObs := this.MakeObs()
	println(action.Ints()[0])
	switch action.Ints()[0] {
	case 0:
		this.hunter, r, t = this.hunter.TurnLeft()
	case 1:
		this.hunter, r, t = this.hunter.TurnRight()
	case 2:
		this.hunter, r, t = this.hunter.Move()
	case 3:
		this.hunter, r, t = this.hunter.Shoot()
	}
	if !t {
		obs = this.MakeObs()
		fmt.Fprintf(os.Stderr, "Sending back\n%v\n%v\n", this.hunter, this.observed)
	} else {
		obs = lastObs
	}
	return
}
Ejemplo n.º 5
0
func (this *BFS3Agent) getActionIndex(act rlglue.Action) (index uint64) {
	return this.task.Act.Ints.Index(act.Ints())
}
Ejemplo n.º 6
0
func (this *Env) EnvStep(action rlglue.Action) (obs rlglue.Observation, r float64, t bool) {
	whichCan := action.Ints()[0]
	process := action.Ints()[1]
	t = true
	for _, can := range this.Cans {
		if !can.Done {
			t = false
		}
	}
	this.Log("%v ", this.Cans)
	if t {
		this.Log("finished\n\n")
		obs = this.makeObs()
		r = 0
		return
	}
	can := this.Cans[whichCan]
	r = -1
	if !can.Done {
		switch process {
		case 0:
			this.Log("painting can %d\n", whichCan+1)
			outcome := stat.NextChoice([]float64{.6, .3, .1})
			switch outcome {
			case 0:
				can.Painted = true
			case 1:
				can.Painted = true
				can.Scratched = true
			case 2:
			}
		case 1:
			this.Log("polishing can %d\n", whichCan+1)
			outcome := stat.NextChoice([]float64{.2, .2, .3, .2, .1})
			switch outcome {
			case 0:
				can.Painted = false
			case 1:
				can.Scratched = false
			case 2:
				can.Polished = true
				can.Painted = false
				can.Scratched = false
			case 3:
				can.Polished = true
				can.Painted = false
			case 4:
			}
		case 2:
			this.Log("shortcut can %d\n", whichCan+1)
			outcome := stat.NextChoice([]float64{0.05, 0.95})
			switch outcome {
			case 0:
				can.Painted = true
				can.Polished = true
			case 1:
			}
		case 3:
			this.Log("finishing can %d\n", whichCan+1)
			if can.Painted && can.Polished && !can.Scratched && !can.Done {
				can.Done = true
				r = 10
			} else {
				t = true
				r = -100000
			}
		}
		this.Cans[whichCan] = can
	}
	obs = this.makeObs()
	return
}