func getSamples() (train, test tabula.ClasetInterface) { samples := tabula.Claset{} _, e := dsv.SimpleRead(SampleDsvFile, &samples) if nil != e { log.Fatal(e) } if !DoTest { return &samples, nil } ntrain := int(float32(samples.Len()) * (float32(NBootstrap) / 100.0)) bag, oob, _, _ := tabula.RandomPickRows(&samples, ntrain, false) train = bag.(tabula.ClasetInterface) test = oob.(tabula.ClasetInterface) train.SetClassIndex(samples.GetClassIndex()) test.SetClassIndex(samples.GetClassIndex()) return train, test }
/* GrowTree build a new tree in forest, return OOB error value or error if tree can not grow. Algorithm, (1) Select random samples with replacement, also with OOB. (2) Build tree using CART, without pruning. (3) Add tree to forest. (4) Save index of random samples for calculating error rate later. (5) Run OOB on forest. (6) Calculate OOB error rate and statistic values. */ func (forest *Runtime) GrowTree(samples tabula.ClasetInterface) ( cm *classifier.CM, stat *classifier.Stat, e error, ) { stat = &classifier.Stat{} stat.ID = int64(len(forest.trees)) stat.Start() // (1) bag, oob, bagIdx, oobIdx := tabula.RandomPickRows( samples.(tabula.DatasetInterface), forest.nSubsample, true) bagset := bag.(tabula.ClasetInterface) if DEBUG >= 2 { bagset.RecountMajorMinor() fmt.Println(tag, "Bagging:", bagset) } // (2) cart, e := cart.New(bagset, cart.SplitMethodGini, forest.NRandomFeature) if e != nil { return nil, nil, e } // (3) forest.AddCartTree(*cart) // (4) forest.AddBagIndex(bagIdx) // (5) if forest.RunOOB { oobset := oob.(tabula.ClasetInterface) _, cm, _ = forest.ClassifySet(oobset, oobIdx) forest.AddOOBCM(cm) } stat.End() if DEBUG >= 3 && forest.RunOOB { fmt.Println(tag, "Elapsed time (s):", stat.ElapsedTime) } forest.AddStat(stat) // (6) if forest.RunOOB { forest.ComputeStatFromCM(stat, cm) if DEBUG >= 2 { fmt.Println(tag, "OOB stat:", stat) } } forest.ComputeStatTotal(stat) e = forest.WriteOOBStat(stat) return cm, stat, e }