func CovMatrix(X []*core.RealSample, cov_func CovFunc) *core.Matrix { l := int64(len(X)) ret := core.NewMatrix() for i := int64(0); i < l; i++ { for j := i; j < l; j++ { c := cov_func(X[i].GetFeatureVector(), X[j].GetFeatureVector()) ret.SetValue(i, j, c) ret.SetValue(j, i, c) } } return ret }
func (algo *NeuralNetwork) Train(dataset *core.DataSet) { algo.Model = TwoLayerWeights{} algo.Model.L1 = core.NewMatrix() algo.Model.L2 = core.NewMatrix() for i := int64(0); i < algo.Params.Hidden; i++ { algo.Model.L1.Data[i] = core.NewVector() } initalized := make(map[int64]int) max_label := 0 for _, sample := range dataset.Samples { if max_label < sample.Label { max_label = sample.Label } for _, f := range sample.Features { _, ok := initalized[f.Id] if !ok { for i := int64(0); i < algo.Params.Hidden; i++ { algo.Model.L1.SetValue(i, f.Id, (rand.Float64()-0.5)/math.Sqrt(float64(algo.Params.Hidden))) } initalized[f.Id] = 1 } } } algo.MaxLabel = int64(max_label) for i := int64(0); i <= algo.Params.Hidden; i++ { for j := int64(0); j <= algo.MaxLabel; j++ { algo.Model.L2.SetValue(i, j, (rand.NormFloat64() / math.Sqrt(float64(algo.MaxLabel)+1.0))) } } for step := 0; step < algo.Params.Steps; step++ { if algo.Params.Verbose <= 0 { fmt.Printf(".") } total := len(dataset.Samples) counter := 0 for _, sample := range dataset.Samples { y := core.NewVector() z := core.NewVector() e := core.NewVector() delta_hidden := core.NewVector() for i := int64(0); i < algo.Params.Hidden; i++ { sum := float64(0) wi := algo.Model.L1.Data[i] for _, f := range sample.Features { sum += f.Value * wi.GetValue(f.Id) } y.Data[i] = util.Sigmoid(sum) } y.Data[algo.Params.Hidden] = 1.0 for i := int64(0); i <= algo.MaxLabel; i++ { sum := float64(0) for j := int64(0); j <= algo.Params.Hidden; j++ { sum += y.GetValue(j) * algo.Model.L2.GetValue(j, i) } z.SetValue(i, sum) } z = z.SoftMaxNorm() e.SetValue(int64(sample.Label), 1.0) e.AddVector(z, -1.0) for i := int64(0); i <= algo.Params.Hidden; i++ { delta := float64(0) for j := int64(0); j <= algo.MaxLabel; j++ { wij := algo.Model.L2.GetValue(i, j) sig_ij := e.GetValue(j) * (1 - z.GetValue(j)) * z.GetValue(j) delta += sig_ij * wij wij += algo.Params.LearningRate * (y.GetValue(i)*sig_ij - algo.Params.Regularization*wij) algo.Model.L2.SetValue(i, j, wij) } delta_hidden.SetValue(i, delta) } for i := int64(0); i < algo.Params.Hidden; i++ { wi := algo.Model.L1.Data[i] for _, f := range sample.Features { wji := wi.GetValue(f.Id) wji += algo.Params.LearningRate * (delta_hidden.GetValue(i)*f.Value*y.GetValue(i)*(1-y.GetValue(i)) - algo.Params.Regularization*wji) wi.SetValue(f.Id, wji) } } counter++ if algo.Params.Verbose > 0 && counter%2000 == 0 { fmt.Printf("Epoch %d %f%%\n", step+1, float64(counter)/float64(total)*100) } } if algo.Params.Verbose > 0 { algo.Evaluate(dataset) } algo.Params.LearningRate *= algo.Params.LearningRateDiscount } fmt.Println() }
/* Given matrix m and vector v, compute inv(m)*v. Based on Gibbs and MacKay 1997, and Mark N. Gibbs's PhD dissertation Details: A - positive seminidefinite matrix u - a vector theta - positive number C = A + I*theta Returns inv(C)*u - So you need the diagonal noise term for covariance matrix in a sense. However, this algorithm is numerically stable, the noise term can be very small and the inversion can still be calculated... */ func (algo *GaussianProcess) ApproximateInversion(A *core.Matrix, u *core.Vector, theta float64, dim int64) *core.Vector { max_itr := 500 tol := 0.01 C := core.NewMatrix() for key, val := range A.Data { C.Data[key] = val.Copy() } // Add theta to diagonal elements for i := int64(0); i < dim; i++ { _, ok := C.Data[i] if !ok { C.Data[i] = core.NewVector() } C.Data[i].Data[i] = C.Data[i].Data[i] + theta } var Q_l float64 var Q_u float64 var dQ float64 u_norm := u.Dot(u) / 2 // Lower bound y_l := core.NewVector() g_l := u.Copy() h_l := u.Copy() lambda_l := float64(0) gamma_l := float64(0) var tmp_f1 float64 var tmp_f2 float64 var tmp_v1 *core.Vector tmp_f1 = g_l.Dot(g_l) tmp_v1 = C.MultiplyVector(h_l) // Upper bound y_u := core.NewVector() g_u := u.Copy() h_u := u.Copy() lambda_u := float64(0) gamma_u := float64(0) var tmp_f3 float64 var tmp_f4 float64 var tmp_v3 *core.Vector var tmp_v4 *core.Vector tmp_v3 = g_u.MultiplyMatrix(A) tmp_v4 = C.MultiplyVector(h_u) tmp_f3 = tmp_v1.Dot(g_u) for i := 0; i < max_itr; i++ { // Lower bound lambda_l = tmp_f1 / h_l.Dot(tmp_v1) y_l.AddVector(h_l, lambda_l) //y_l next Q_l = y_l.Dot(u) - 0.5*(y_l.MultiplyMatrix(C)).Dot(y_l) // Upper bound lambda_u = tmp_f3 / tmp_v3.Dot(tmp_v4) y_u.AddVector(h_u, lambda_u) //y_u next Q_u = (y_u.MultiplyMatrix(A)).Dot(u) - 0.5*((y_u.MultiplyMatrix(C)).MultiplyMatrix(A)).Dot(y_u) dQ = (u_norm-Q_u)/theta - Q_l if dQ < tol { break } // Lower bound var updates g_l.AddVector(tmp_v1, -lambda_l) //g_l next tmp_f2 = g_l.Dot(g_l) gamma_l = tmp_f2 / tmp_f1 for key, val := range h_l.Data { h_l.SetValue(key, val*gamma_l) } h_l.AddVector(g_l, 1) //h_l next tmp_f1 = tmp_f2 //tmp_f1 next tmp_v1 = C.MultiplyVector(h_l) //tmp_v1 next // Upper bound var updates g_u.AddVector(tmp_v4, -lambda_u) //g_u next tmp_v3 = g_u.MultiplyMatrix(A) //tmp_v3 next tmp_f4 = tmp_v3.Dot(g_u) gamma_u = tmp_f4 / tmp_f3 for key, val := range h_u.Data { h_u.SetValue(key, val*gamma_u) } h_u.AddVector(g_u, 1) //h_u next tmp_v4 = C.MultiplyVector(h_u) //tmp_v4 next tmp_f3 = tmp_f4 // tmp_f3 next } return y_l }