Ejemplo n.º 1
0
func TestDepMatch(t *testing.T) {
	defer nicetrace.Print()
	stat.Seed(seed)
	tstr := "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR 1 OBSERVATIONS INTS (0 4) (-1 1) (0 2) ACTIONS INTS (0 1) REWARDS (0 1.0)"
	task, _ := rlglue.ParseTaskSpec(tstr)
	stateRanges := task.Obs.Ints
	actionRanges := task.Act.Ints
	cfg := ConfigDefault()
	cfg.Alpha = alpha
	cfg.M = M
	genDLs := []*DepLearner{NewDepLearner(0, cfg, stateRanges, actionRanges), NewDepLearner(1, cfg, stateRanges, actionRanges), NewDepLearner(2, cfg, stateRanges, actionRanges)}
	genDLs[0].SetParents(ParentSet(0).Insert(0, 1))
	genDLs[1].SetParents(ParentSet(0).Insert(0, 2))
	genDLs[2].SetParents(ParentSet(0).Insert(2))
	patDLs := []*DepLearner{NewDepLearner(0, cfg, stateRanges, actionRanges), NewDepLearner(1, cfg, stateRanges, actionRanges), NewDepLearner(2, cfg, stateRanges, actionRanges)}
	numStates := stateRanges.Count()
	numActions := actionRanges.Count()
	RS := stat.Range(int64(numStates))
	RA := stat.Range(int64(numActions))
	startTime := time.Nanoseconds()
	lastWrongStep := make([]int, len(genDLs))
	for i := 0; i < steps; i++ {
		s := uint64(RS())
		a := uint64(RA())
		nv := make([]int32, len(genDLs))
		for child := 0; child < len(nv); child++ {
			nv[child] = genDLs[child].Next(s, a)
		}
		for child := 0; child < len(nv); child++ {
			genDLs[child] = genDLs[child].Update(s, a, nv[child])
		}
		for child := 0; child < len(nv); child++ {
			patDLs[child] = patDLs[child].Update(s, a, nv[child])
		}
		if i%1 == 0 {
			for child := 0; child < len(nv); child++ {
				patDLs[child].ConsiderRandomFlip()
				if genDLs[child].parents != patDLs[child].parents {
					lastWrongStep[child] = i
				}
			}
		}
	}
	fmt.Println(lastWrongStep)
	endTime := time.Nanoseconds()
	duration := endTime - startTime
	if true {
		fmt.Printf("Ran in %fms\n", float64(duration)/1e6)
	}
	for child := range genDLs {
		if genDLs[child].parents != patDLs[child].parents {
			t.Error(fmt.Sprintf("%d: %v != %v", child, genDLs[child].parents.Slice(), patDLs[child].parents.Slice()))
		}
	}
}
Ejemplo n.º 2
0
func TestBeliefMatch(t *testing.T) {
	defer nicetrace.Print()
	stat.Seed(seed)
	tstr := "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR 1 OBSERVATIONS INTS (0 4) (-1 1) (0 2) ACTIONS INTS (0 1) REWARDS (0 1.0)"
	task, _ := rlglue.ParseTaskSpec(tstr)
	cfg := ConfigDefault()
	cfg.Alpha = alpha
	cfg.M = M
	beliefG := NewBelief(cfg, task)
	beliefG.learners[0].SetParents(ParentSet(0).Insert(0, 1))
	beliefG.learners[1].SetParents(ParentSet(0).Insert(0, 2))
	beliefG.learners[2].SetParents(ParentSet(0).Insert(2))
	beliefP := NewBelief(cfg, task)
	numStates := task.Obs.Ints.Count()
	numActions := task.Act.Ints.Count()
	RS := stat.Range(int64(numStates))
	RA := stat.Range(int64(numActions))
	startTime := time.Nanoseconds()
	lastWrongStep := make([]int, len(task.Obs.Ints))
	for i := 0; i < steps; i++ {
		s := uint64(RS())
		a := uint64(RA())
		n := beliefG.Next(s, a)
		beliefG = beliefG.Update(s, a, n).(*Belief)
		beliefP = beliefP.Update(s, a, n).(*Belief)
		if i%1 == 0 {
			beliefP.ConsiderRandomFlipAll()
			for child, learner := range beliefP.learners {
				if beliefG.learners[child].parents != learner.parents {
					lastWrongStep[child] = i
				}
			}
		}
	}
	fmt.Println(lastWrongStep)
	endTime := time.Nanoseconds()
	duration := endTime - startTime
	if true {
		fmt.Printf("Ran in %fms\n", float64(duration)/1e6)
	}
	for child := range beliefP.learners {
		if beliefG.learners[child].parents != beliefP.learners[child].parents {
			t.Error(fmt.Sprintf("%d: %v != %v", child, beliefG.learners[child].parents.Slice(), beliefP.learners[child].parents.Slice()))
		}
	}
}