func auc(actual []float64, posterior []float64) float64 { log := util.GetLogger() r := tied_rank(posterior) num_positive := 0. sum_positive := 0. for i := 0; i < len(actual); i++ { if actual[i] == 1 { num_positive++ } } num_negative := float64(len(actual)) - num_positive for i := 0; i < len(r); i++ { if actual[i] == 1 { sum_positive += r[i] } } if num_negative*num_positive < 0.00001 { log.Info(fmt.Sprintf("num_positive %d, num_negative %d, sum_positive%d\n", num_positive, num_negative, sum_positive)) return 0. } auc := ((sum_positive - num_positive*(num_positive+1)/2.0) / (num_negative * num_positive)) return auc }
func (ft *FtrlTrainer) Initialize(epoch int, cache_feature_num bool) bool { ft.Epoch = epoch ft.CacheFeatureNum = cache_feature_num ft.Init = true ft.log = util.GetLogger() return ft.Init }
func StreamRun(model_file string, instances []string) (string, error) { log := util.GetLogger() if !util.FileExists(model_file) || len(instances) == 0 { log.Error("[Predictor-StreamRun] Model file or instances error.") return fmt.Sprintf(errorjson, "[Predictor-StreamRun] Model file or instances error."), errors.New("[Predictor-StreamRun] Model file or instances error.") } var rtstr string var model solver.LRModel model.Initialize(model_file) for i := 0; i < len(instances); i++ { res, _, x := util.ParseSample(instances[i]) if res != nil { break } pred := model.Predict(x) pred = math.Max(math.Min(pred, 1.-10e-15), 10e-15) if i == len(instances)-1 { rtstr += strconv.FormatFloat(pred, 'f', 6, 64) } else { rtstr += strconv.FormatFloat(pred, 'f', 6, 64) + "," } } return fmt.Sprintf(streamjson, rtstr), nil }
func (lr *LRModel) Initialize(path string) error { lr.log = util.GetLogger() file, err := os.Open(path) if err != nil { lr.log.Error(fmt.Sprintf("[LRModel-Initialize] Lr model initialize error.%s", err.Error())) return errors.New(fmt.Sprintf("[LRModel-Initialize] Lr model initialize error.%s", err.Error())) } defer file.Close() var fls FtrlSolver m, err := ioutil.ReadAll(file) if err != nil { lr.log.Error(fmt.Sprintf("[LRModel-Initialize] Lr model initialize error.%s", err.Error())) return errors.New(fmt.Sprintf("[LRModel-Initialize] Lr model initialize error.%s", err.Error())) } err2 := json.Unmarshal(m, &fls) if err2 != nil { lr.log.Error(fmt.Sprintf("[LRModel-Initialize] Lr model initialize error.%s", err2.Error())) return errors.New(fmt.Sprintf("[LRModel-Initialize] Lr model initialize error.%s", err2.Error())) } lr.Model = make(map[int]float64) for i := 0; i < len(fls.Weights); i++ { lr.Model[fls.Weights[i].Index] = fls.Weights[i].Value } lr.Init = true return nil }
func (fs *FtrlSolver) SaveModel(path string) error { log := util.GetLogger() if !fs.Init { log.Error("[FtrlSolver-SaveModel] Ftrl solver initialize error.") return errors.New("[FtrlSolver-SaveModel] Ftrl solver initialize error.") } file, err := os.Create(path) if err != nil { log.Error(fmt.Sprintf("[FtrlSolver-SaveModel] Ftrl solver save model error.%s", err.Error())) return errors.New(fmt.Sprintf("[FtrlSolver-SaveModel] Ftrl solver save model error.%s", err.Error())) } fs.Weights = make(util.Pvector, fs.Featnum) for i := 0; i < fs.Featnum; i++ { val := util.Round(fs.GetWeight(i), 5) fs.Weights[i] = util.Pair{i, val} } b, err2 := json.Marshal(fs) if err2 != nil { log.Error(fmt.Sprintf("[FtrlSolver-SaveModel] Ftrl solver save model error.%s", err2.Error())) return errors.New(fmt.Sprintf("[FtrlSolver-SaveModel] Ftrl solver save model error.%s", err2.Error())) } _, err = file.Write(b) if err != nil { log.Error(fmt.Sprintf("[FtrlSolver-SaveModel] Ftrl solver save model error.%s", err2.Error())) return errors.New(fmt.Sprintf("[FtrlSolver-SaveModel] Ftrl solver save model error.%s", err2.Error())) } return nil }
func (lan *Lands) Initialize(configFile string) error { var err error lan.mux = make(map[string]func(http.ResponseWriter, *util.ModelParam) error) lan.mux["/goline/online"] = lan.onlineServeHttp lan.mux["/goline/offline"] = lan.offlineServeHttp lan.mux["/goline/predict"] = lan.predictServeHttp file, err := ioutil.ReadFile(configFile) if err != nil { lan.log4goline.Error("[Lands-Initialize]Open Config error." + err.Error()) return errors.New("[Lands-Initialize]Open Config error." + err.Error()) } temp := new(util.Config) if err = json.Unmarshal(file, temp); err != nil { lan.log4goline.Error("[Lands-Initialize]Parse config file error." + err.Error()) return errors.New("[Lands-Initialize]Parse config file error." + err.Error()) } lan.conf = temp lan.pool = util.InitRedisPool(&lan.conf.Redis) if lan.pool == nil { lan.log4goline.Error("[Lands-Initialize]Initialize redis pool error.") return errors.New("[Lands-Initialize]Initialize redis pool error.") } util.InitLogger(lan.conf.LogModule) lan.log4goline = util.GetLogger() return nil }
func (lft *LockFreeFtrlTrainer) Initialize( epoch int, num_threads int, cache_feature_num bool) bool { lft.Epoch = epoch lft.CacheFeatureNum = cache_feature_num lft.NumThreads = num_threads lft.log = util.GetLogger() lft.Init = true return lft.Init }
func (fps *FtrlParamServer) Initialize( alpha float64, beta float64, l1 float64, l2 float64, n int, dropout float64) error { fps.log = util.GetLogger() if !fps.FtrlSolver.Initialize(alpha, beta, l1, l2, n, dropout) { fps.log.Error("[FtrlParamServer-Initialize] Fast ftrl solver initialize error.") return errors.New("[FtrlParamServer-Initialize] Fast ftrl solver initialize error.") } fps.ParamGroupNum = calc_group_num(n) fps.LockSlots = make([]sync.Mutex, fps.ParamGroupNum) fps.Init = true return nil }
func (fs *FtrlSolver) SaveEncodeModel() (string, error) { log := util.GetLogger() if !fs.Init { log.Error("[FtrlSolver-SaveEncodeModel] Ftrl solver initialize error.") return "", errors.New("[FtrlSolver-SaveEncodeModel] Ftrl solver initialize error.") } fs.Weights = make(util.Pvector, fs.Featnum) for i := 0; i < fs.Featnum; i++ { val := util.Round(fs.GetWeight(i), 5) fs.Weights[i] = util.Pair{i, val} } b, err := json.Marshal(fs) if err != nil { log.Error(fmt.Sprintf("[FtrlSolver-SaveEncodeModel] Ftrl solver save model error.%s", err.Error())) return "", errors.New(fmt.Sprintf("[FtrlSolver-SaveEncodeModel] Ftrl solver save model error.%s", err.Error())) } return string(b), nil }
func (fft *FastFtrlTrainer) Initialize( epoch int, num_threads int, cache_feature_num bool, burn_in float64, push_step int, fetch_step int) bool { fft.Epoch = epoch fft.CacheFeatureNum = cache_feature_num fft.PusStep = push_step fft.FetchStep = fetch_step if num_threads == 0 { fft.NumThreads = runtime.NumCPU() } else { fft.NumThreads = num_threads } fft.Init = true fft.BurnIn = burn_in fft.log4fft = util.GetLogger() return fft.Init }
func (fw *FtrlWorker) Initialize( param_server *FtrlParamServer, push_step int, fetch_step int) bool { fw.FtrlSolver.Alpha = param_server.Alpha fw.FtrlSolver.Beta = param_server.Beta fw.FtrlSolver.L1 = param_server.L1 fw.FtrlSolver.L2 = param_server.L2 fw.FtrlSolver.Featnum = param_server.Featnum fw.FtrlSolver.Dropout = param_server.Dropout fw.NUpdate = make([]float64, fw.FtrlSolver.Featnum) fw.ZUpdate = make([]float64, fw.FtrlSolver.Featnum) fw.SetFloatZero(fw.NUpdate, fw.FtrlSolver.Featnum) fw.SetFloatZero(fw.ZUpdate, fw.FtrlSolver.Featnum) fw.N = make([]float64, fw.FtrlSolver.Featnum) fw.Z = make([]float64, fw.FtrlSolver.Featnum) if param_server.FetchParam(fw.N, fw.Z) != nil { return false } fw.ParamGroupNum = calc_group_num(fw.FtrlSolver.Featnum) fw.ParamGroupStep = make([]int, fw.ParamGroupNum) for i := 0; i < fw.ParamGroupNum; i++ { fw.ParamGroupStep[i] = 0 } fw.PushStep = push_step fw.FetchStep = fetch_step fw.log = util.GetLogger() fw.FtrlSolver.Init = true return fw.FtrlSolver.Init }
func print_usage(argc int, argv []string) { log := util.GetLogger() log.Error(fmt.Sprintf("Usage:\n", "")) log.Error(fmt.Sprintf("\t%s job_name test_file model output_file threshold\n", argv[0])) }
func Run(argc int, argv []string) (string, error) { var job_name string var test_file string var model_file string var output_file string var threshold float64 log := util.GetLogger() if len(argv) == 5 { job_name = argv[0] test_file = argv[1] model_file = argv[2] output_file = argv[3] threshold, _ = strconv.ParseFloat(argv[4], 64) } else { print_usage(argc, argv) log.Error("[Predictor-Run] Input parameters error.") return fmt.Sprintf(errorjson, "[Predictor-Run] Input parameters error."), errors.New("[Predictor-Run] Input parameters error.") } if len(job_name) == 0 || len(test_file) == 0 || len(model_file) == 0 || len(output_file) == 0 { print_usage(argc, argv) log.Error("[Predictor-Run] Input parameters error.") return fmt.Sprintf(errorjson, "[Predictor-Run] Input parameters error."), errors.New("[Predictor-Run] Input parameters error.") } var model solver.LRModel model.Initialize(model_file) var wfp *os.File var err1 error exist := func(filename string) bool { var exist = true if _, err := os.Stat(filename); os.IsNotExist(err) { exist = false } return exist } if exist(output_file) { wfp, err1 = os.OpenFile(output_file, os.O_SYNC, 0666) } else { wfp, err1 = os.Create(output_file) } if err1 != nil { log.Error("[Predictor-Run] Open file error." + err1.Error()) return fmt.Sprintf(errorjson, err1.Error()), errors.New("[Predictor-Run] Open file error." + err1.Error()) } defer wfp.Close() cnt := 0 //样本总数 pcorrect := 0 //正样本预测正确数 pcnt := 0 //正样本总数 ncorrect := 0 //负样本预测正确数 var loss float64 = 0. var parser trainer.FileParser err := parser.OpenFile(test_file) if err != nil { log.Error("[Predictor-Run] Open file error." + err.Error()) return fmt.Sprintf(errorjson, err.Error()), errors.New("[Predictor-Run] Open file error." + err.Error()) } var pred_scores util.Dvector for { res, y, x := parser.ReadSample() if res != nil { break } pred := model.Predict(x) pred = math.Max(math.Min(pred, 1.-10e-15), 10e-15) wfp.WriteString(fmt.Sprintf("%f\n", pred)) pred_scores = append(pred_scores, util.DPair{pred, y}) cnt++ if util.UtilFloat64Equal(y, 1.0) { pcnt++ } var pred_label float64 = 0 if pred > threshold { pred_label = 1 } if util.UtilFloat64Equal(pred_label, y) { if util.UtilFloat64Equal(y, 1.0) { pcorrect++ } else { ncorrect++ } } pred = math.Max(math.Min(pred, 1.-10e-15), 10e-15) if y > 0 { loss += -math.Log(pred) } else { loss += -math.Log(1. - pred) } } auc := calc_auc(pred_scores) if auc < 0.5 { auc = 0.5 } if cnt > 0 { log.Info(fmt.Sprintf("[%s] Log-likelihood = %f\n", job_name, float64(loss)/float64(cnt))) log.Info(fmt.Sprintf("[%s] Precision = %.2f%% (%d/%d)\n", job_name, float64(pcorrect*100)/float64(cnt-pcnt-ncorrect+pcorrect), pcorrect, cnt-pcnt-ncorrect+pcorrect)) log.Info(fmt.Sprintf("[%s] Recall = %.2f%% (%d/%d)\n", job_name, float64(pcorrect*100)/float64(pcnt), pcorrect, pcnt)) log.Info(fmt.Sprintf("[%s] Accuracy = %.2f%% (%d/%d)\n", job_name, float64((pcorrect+ncorrect)*100)/float64(cnt), (pcorrect + ncorrect), cnt)) log.Info(fmt.Sprintf("[%s] AUC = %f\n", job_name, auc)) } parser.CloseFile() util.Write2File(output_file, fmt.Sprintf(" Log-likelihood = %f\n Precision = %f (%d/%d)\n Recall = %f (%d/%d)\n Accuracy = %f (%d/%d)\n AUC = %f\n", float64(loss)/float64(cnt), float64(pcorrect)/float64(cnt-pcnt-ncorrect+pcorrect), pcorrect, cnt-pcnt-ncorrect+pcorrect, float64(pcorrect)/float64(pcnt), pcorrect, pcnt, float64(pcorrect+ncorrect)/float64(cnt), pcorrect+ncorrect, cnt, auc)) return fmt.Sprintf(returnJson, job_name, fmt.Sprintf("Log-likelihood = %f", float64(loss)/float64(cnt)), fmt.Sprintf("Precision = %f (%d/%d)", float64(pcorrect)/float64(cnt-pcnt-ncorrect+pcorrect), pcorrect, cnt-pcnt-ncorrect+pcorrect), fmt.Sprintf("Recall = %f (%d/%d)", float64(pcorrect)/float64(pcnt), pcorrect, pcnt), fmt.Sprintf("Accuracy = %f (%d/%d)", float64((pcorrect+ncorrect))/float64(cnt), (pcorrect+ncorrect), cnt), fmt.Sprintf("AUC = %f", auc), output_file), nil }
func read_problem_info( train_file string, read_cache bool, num_threads int) (int, int, error) { feat_num := 0 line_cnt := 0 log := util.GetLogger() var lock sync.Mutex var parser FileParser var errall error read_from_cache := func(path string) error { fs, err := os.Open(path) defer fs.Close() if err != nil { return err } bfRd := bufio.NewReader(fs) line, err := bfRd.ReadString('\n') if err != nil { return err } var res []string = s.Split(line, " ") if len(res) != 2 { log.Error("[read_problem_info] File format error.") return errors.New("[read_problem_info] File format error.") } feat_num, errall = strconv.Atoi(res[0]) if errall != nil { log.Error("[read_problem_info] Label format error." + errall.Error()) return errors.New("[read_problem_info] Label format error." + errall.Error()) } line_cnt, errall = strconv.Atoi(res[1]) if errall != nil { log.Error("[read_problem_info] Feature format error." + errall.Error()) return errors.New("[read_problem_info] Feature format error." + errall.Error()) } return nil } exist := func(filename string) bool { var exist = true if _, err := os.Stat(filename); os.IsNotExist(err) { exist = false } return exist } write_to_cache := func(filename string) error { var f *os.File var err1 error if exist(filename) { f, err1 = os.OpenFile(filename, os.O_WRONLY, 0666) } else { f, err1 = os.Create(filename) } if err1 != nil { return err1 } defer f.Close() wireteString := string(feat_num) + " " + string(line_cnt) + "\n" _, err1 = io.WriteString(f, wireteString) if err1 != nil { return err1 } return nil } read_problem_worker := func(i int, c *sync.WaitGroup) { local_max_feat := 0 local_count := 0 for { flag, _, local_x := parser.ReadSampleMultiThread() if flag != nil { break } for i := 0; i < len(local_x); i++ { if local_x[i].Index+1 > local_max_feat { local_max_feat = local_x[i].Index + 1 } } local_count++ } lock.Lock() line_cnt += local_count lock.Unlock() if local_max_feat > feat_num { feat_num = local_max_feat } defer c.Done() } cache_file := string(train_file) + ".cache" cache_exists := exist(cache_file) if read_cache && cache_exists { read_from_cache(cache_file) } else { parser.OpenFile(train_file) util.UtilParallelRun(read_problem_worker, num_threads) parser.CloseFile() } log.Info(fmt.Sprintf("[read_problem_info] Instances=[%d] features=[%d]\n", line_cnt, feat_num)) if read_cache && !cache_exists { write_to_cache(cache_file) } return feat_num, line_cnt, nil }