func (this *DepLearner) Consider(np ParentSet) { if np == this.parents { return } if stat.NextBernoulli(1/(1+float64(this.consistency))) == 0 { return } alternateRanges := np.CutRanges(this.bg.ranges) alternateHistory := this.MakeMappedHistory(np, alternateRanges) alternateLoglihood := this.MappedLoglihoodRatio(alternateHistory) choiceLL := []float64{this.mappedLoglihood, alternateLoglihood} sizeDiff := float64(this.parents.Size(uint32(len(this.bg.ranges))) - np.Size(uint32(len(this.bg.ranges)))) if sizeDiff > 0 { choiceLL[0] += sizeDiff * this.bg.cfg.Kappa choiceLL[1] += sizeDiff * (1 - this.bg.cfg.Kappa) } else { choiceLL[0] -= sizeDiff * (1 - this.bg.cfg.Kappa) choiceLL[1] -= sizeDiff * this.bg.cfg.Kappa } if stat.NextLogChoice(choiceLL) == 1 { this.parents = np this.cutRanges = alternateRanges this.mappedHistory = alternateHistory this.mappedLoglihood = alternateLoglihood this.consistency = 0 } else { this.consistency++ } }
func (this *Environment) EnvInit() (taskString string) { fstr := "VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR %f OBSERVATIONS INTS (%d 0 1) ACTIONS INTS (0 %d) REWARDS (-1.0 1.0)" taskString = fmt.Sprintf(fstr, this.cfg.DiscountFactor, this.cfg.NumSystems, this.cfg.NumSystems) this.task, _ = rlglue.ParseTaskSpec(taskString) this.status = make([]bool, this.cfg.NumSystems) for i := range this.status { this.status[i] = stat.NextBernoulli(this.cfg.StartBoot) == 1 } return }