//更新权重方法 func (fs *FtrlSolver) Update(x util.Pvector, y float64) float64 { if !fs.Init { return 0 } var weights util.Pvector = make(util.Pvector, fs.Featnum) var gradients []float64 = make([]float64, fs.Featnum) var wTx float64 = 0. for i := 0; i < len(x); i++ { item := x[i] if util.UtilGreater(fs.Dropout, 0.0) { rand_prob := util.UniformDistribution() if rand_prob < fs.Dropout { continue } } var idx int = item.Index if idx >= fs.Featnum { continue } //获取w权重值 var val float64 = fs.GetWeight(idx) //建立w权重数组 weights = append(weights, util.Pair{idx, val}) //每个样本梯度值默认赋值为样本x本身 gradients = append(gradients, item.Value) //计算仿射函数wT*x的值 wTx += val * item.Value } //计算模型预估值 var pred float64 = util.Sigmoid(wTx) //计算p_t-y_t值,为计算每个样本的梯度做准备 var grad float64 = pred - y //计算g_i = (p_t-y_t)*x_i util.VectorMultiplies(gradients, grad) for k := 0; k < len(weights); k++ { var i int = weights[k].Index var w_i float64 = weights[k].Value var grad_i float64 = gradients[k] var sigma float64 = (math.Sqrt(fs.N[i]+grad_i*grad_i) - math.Sqrt(fs.N[i])) / fs.Alpha //z_i=z_i+g_i-sigma_i*w_(t,i) fs.Z[i] += grad_i - sigma*w_i //n_i=n_i+g_i*g_i fs.N[i] += grad_i * grad_i } return pred }
func (fw *FtrlWorker) Update( x util.Pvector, y float64, param_server *FtrlParamServer) float64 { if !fw.FtrlSolver.Init { return 0. } var weights util.Pvector = make(util.Pvector, fw.FtrlSolver.Featnum) var gradients []float64 = make([]float64, fw.FtrlSolver.Featnum) var wTx float64 = 0. for i := 0; i < len(x); i++ { item := x[i] if util.UtilGreater(fw.FtrlSolver.Dropout, 0.0) { rand_prob := util.UniformDistribution() if rand_prob < fw.FtrlSolver.Dropout { continue } } var idx int = item.Index if idx >= fw.FtrlSolver.Featnum { continue } //获取w权重值 var val float64 = fw.FtrlSolver.GetWeight(idx) //建立w权重数组 weights = append(weights, util.Pair{idx, val}) //每个样本梯度值默认赋值为样本x本身 gradients = append(gradients, item.Value) //计算仿射函数wT*x的值 wTx += val * item.Value } //计算模型预估值 var pred float64 = util.Sigmoid(wTx) //计算p_t-y_t值,为计算每个样本的梯度做准备 var grad float64 = pred - y //计算g_i = (p_t-y_t)*x_i util.VectorMultiplies(gradients, grad) for k := 0; k < len(weights); k++ { var i int = weights[k].Index var g int = i / ParamGroupSize if fw.ParamGroupStep[g]%fw.FetchStep == 0 { param_server.FetchParamGroup( fw.FtrlSolver.N, fw.FtrlSolver.Z, g) } var w_i float64 = weights[k].Value var grad_i float64 = gradients[k] var sigma float64 = (math.Sqrt(fw.FtrlSolver.N[i]+grad_i*grad_i) - math.Sqrt(fw.FtrlSolver.N[i])) / fw.FtrlSolver.Alpha fw.FtrlSolver.Z[i] += grad_i - sigma*w_i fw.FtrlSolver.N[i] += grad_i * grad_i fw.ZUpdate[i] += grad_i - sigma*w_i fw.NUpdate[i] += grad_i * grad_i if fw.ParamGroupStep[g]%fw.PushStep == 0 { param_server.PushParamGroup(fw.NUpdate, fw.ZUpdate, g) } fw.ParamGroupStep[g] += 1 } return pred }
func (fft *FastFtrlTrainer) TrainImpl( model_file string, train_file string, line_cnt int, test_file string) error { if !fft.Init { fft.log4fft.Error("[FastFtrlTrainer-TrainImpl] Fast ftrl trainer restore error.") return errors.New("[FastFtrlTrainer-TrainImpl] Fast ftrl trainer restore error.") } fft.log4fft.Info(fmt.Sprintf( "[%s] params={alpha:%.2f, beta:%.2f, l1:%.2f, l2:%.2f, dropout:%.2f, epoch:%d}\n", fft.JobName, fft.ParamServer.Alpha, fft.ParamServer.Beta, fft.ParamServer.L1, fft.ParamServer.L2, fft.ParamServer.Dropout, fft.Epoch)) var solvers []solver.FtrlWorker = make([]solver.FtrlWorker, fft.NumThreads) for i := 0; i < fft.NumThreads; i++ { solvers[i].Initialize(&fft.ParamServer, fft.PusStep, fft.FetchStep) } predict_func := func(x util.Pvector) float64 { return fft.ParamServer.Predict(x) } var timer util.StopWatch timer.StartTimer() for iter := 0; iter < fft.Epoch; iter++ { var file_parser ParallelFileParser file_parser.OpenFile(train_file, fft.NumThreads) count := 0 var loss float64 = 0. var lock sync.Mutex worker_func := func(i int, c *sync.WaitGroup) { local_count := 0 var local_loss float64 = 0 for { flag, y, x := file_parser.ReadSampleMultiThread(i) if flag != nil { break } pred := solvers[i].Update(x, y, &fft.ParamServer) local_loss += calc_loss(y, pred) local_count++ if i == 0 && local_count%10000 == 0 { tmp_cnt := math.Min(float64(local_count*fft.NumThreads), float64(line_cnt)) fft.log4fft.Info(fmt.Sprintf("[%s] epoch=%d processed=[%.2f%%] time=[%.2f] train-loss=[%.6f]\n", fft.JobName, iter, float64(tmp_cnt*100)/float64(line_cnt), timer.StopTimer(), float64(local_loss)/float64(local_count))) } } lock.Lock() count += local_count loss += local_loss lock.Unlock() solvers[i].PushParam(&fft.ParamServer) defer c.Done() } if iter == 0 && util.UtilGreater(fft.BurnIn, float64(0)) { burn_in_cnt := int(fft.BurnIn * float64(line_cnt)) var local_loss float64 = 0 for i := 0; i < burn_in_cnt; i++ { //线程0做预热 flag, y, x := file_parser.ReadSample(0) if flag != nil { break } pred := fft.ParamServer.Update(x, y) local_loss += calc_loss(y, pred) if i%10000 == 0 { fft.log4fft.Info(fmt.Sprintf("[%s] burn-in processed=[%.2f%%] time=[%.2f] train-loss=[%.6f]\n", fft.JobName, float64((i+1)*100)/float64(line_cnt), timer.StopTimer(), float64(local_loss)/float64(i+1))) } } fft.log4fft.Info(fmt.Sprintf("[%s] burn-in processed=[%.2f%%] time=[%.2f] train-loss=[%.6f]\n", fft.JobName, float64(burn_in_cnt*100)/float64(line_cnt), timer.StopTimer(), float64(local_loss)/float64(burn_in_cnt))) if util.UtilFloat64Equal(fft.BurnIn, float64(1)) { continue } } for i := 0; i < fft.NumThreads; i++ { solvers[i].Reset(&fft.ParamServer) } util.UtilParallelRun(worker_func, fft.NumThreads) file_parser.CloseFile(fft.NumThreads) // f(w, // "[%s] epoch=%d processed=[%.2f%%] time=[%.2f] train-loss=[%.6f]\n", // fft.JobName, // iter, // float64(count*100)/float64(line_cnt), // timer.StopTimer(), // float64(loss)/float64(count)) if test_file != "" { eval_loss := evaluate_file(test_file, predict_func, fft.NumThreads) fft.log4fft.Info(fmt.Sprintf("[%s] validation-loss=[%f]\n", fft.JobName, float64(eval_loss))) } } return fft.ParamServer.SaveModel(model_file) }