func (*Tool) Run(args []string) { fmt.Println("running logistic regression") n := 1000 p := 10 beta := make([]float64, p) beta[0] = rand.NormFloat64() beta[1] = rand.NormFloat64() x := la.NewMatrix(n, p) y := la.NewVector(n) for i := 0; i < n; i++ { v := randVec(p) var z float64 for j := 0; j < p; j++ { x.Set(i, j, v[j]) z += beta[j] } if z > 0 { y.Set(i, +1) } else { y.Set(i, -1) } } rp := &model.RegressionProblem{ N: n, P: p, Data: x, Response: y, ColumnNames: names("p", p), RowNames: names("x", n), ResponseName: "y", } rc := &model.LogisticRegressionRisk{} pc := model.NewLassoPenalty(p) dv := 0.001 vmax := 0.07 mon := &FixedVMonitor{vmax} oa := &model.RandomAssigner{rp.Data.Rows, 2.0 / 3.0} tt := oa.Assign() results := model.RunGpsFull(rp, tt, dv, rc, pc, mon.Continue) fmt.Println(results) }
func Load() (*model.RegressionProblem, *model.ObservationAssignments) { scanner := bufio.NewScanner(strings.NewReader(CancerDataCSV)) var lines [][]string for scanner.Scan() { lines = append(lines, strings.Split(scanner.Text(), "\t")) } if scanner.Err() != nil { panic("can't read data") } var test, train []int var rowNames []string header := lines[0] m := la.NewMatrix(len(lines)-1, 8) for i := 0; i < m.Rows; i++ { for j := 0; j < m.Cols; j++ { if f, err := strconv.ParseFloat(strings.TrimSpace(lines[i+1][j+1]), 64); err == nil { m.Set(i, j, f) } } switch lines[i+1][10] { case "T": train = append(train, i) case "F": test = append(test, i) } rowNames = append(rowNames, lines[i+1][0]) } y := la.NewVector(len(lines) - 1) for i := 0; i < y.Size; i++ { if f, err := strconv.ParseFloat(strings.TrimSpace(lines[i+1][9]), 64); err == nil { y.Set(i, f) } } out := model.RegressionProblem{ N: m.Rows, P: m.Cols, Data: m, Response: y, ColumnNames: header[1 : len(header)-2], RowNames: rowNames, ResponseName: header[9], } tt := model.ObservationAssignments{ TrainingIndicies: train, TestingIndicies: test, } return &out, &tt }