示例#1
0
func auc(actual []float64, posterior []float64) float64 {
	log := util.GetLogger()
	r := tied_rank(posterior)
	num_positive := 0.
	sum_positive := 0.
	for i := 0; i < len(actual); i++ {
		if actual[i] == 1 {
			num_positive++
		}
	}

	num_negative := float64(len(actual)) - num_positive
	for i := 0; i < len(r); i++ {
		if actual[i] == 1 {
			sum_positive += r[i]
		}
	}

	if num_negative*num_positive < 0.00001 {
		log.Info(fmt.Sprintf("num_positive %d, num_negative %d, sum_positive%d\n", num_positive, num_negative, sum_positive))
		return 0.
	}

	auc := ((sum_positive - num_positive*(num_positive+1)/2.0) /
		(num_negative * num_positive))

	return auc
}
示例#2
0
func (ft *FtrlTrainer) Initialize(epoch int, cache_feature_num bool) bool {
	ft.Epoch = epoch
	ft.CacheFeatureNum = cache_feature_num
	ft.Init = true
	ft.log = util.GetLogger()
	return ft.Init
}
示例#3
0
func StreamRun(model_file string, instances []string) (string, error) {
	log := util.GetLogger()
	if !util.FileExists(model_file) || len(instances) == 0 {
		log.Error("[Predictor-StreamRun] Model file or instances error.")
		return fmt.Sprintf(errorjson, "[Predictor-StreamRun] Model file or instances error."), errors.New("[Predictor-StreamRun] Model file or instances error.")
	}

	var rtstr string
	var model solver.LRModel
	model.Initialize(model_file)
	for i := 0; i < len(instances); i++ {
		res, _, x := util.ParseSample(instances[i])
		if res != nil {
			break
		}

		pred := model.Predict(x)
		pred = math.Max(math.Min(pred, 1.-10e-15), 10e-15)
		if i == len(instances)-1 {
			rtstr += strconv.FormatFloat(pred, 'f', 6, 64)
		} else {
			rtstr += strconv.FormatFloat(pred, 'f', 6, 64) + ","
		}
	}

	return fmt.Sprintf(streamjson, rtstr), nil
}
示例#4
0
func (lr *LRModel) Initialize(path string) error {
	lr.log = util.GetLogger()
	file, err := os.Open(path)
	if err != nil {
		lr.log.Error(fmt.Sprintf("[LRModel-Initialize] Lr model initialize error.%s", err.Error()))
		return errors.New(fmt.Sprintf("[LRModel-Initialize] Lr model initialize error.%s", err.Error()))
	}

	defer file.Close()

	var fls FtrlSolver
	m, err := ioutil.ReadAll(file)
	if err != nil {
		lr.log.Error(fmt.Sprintf("[LRModel-Initialize] Lr model initialize error.%s", err.Error()))
		return errors.New(fmt.Sprintf("[LRModel-Initialize] Lr model initialize error.%s", err.Error()))
	}

	err2 := json.Unmarshal(m, &fls)
	if err2 != nil {
		lr.log.Error(fmt.Sprintf("[LRModel-Initialize] Lr model initialize error.%s", err2.Error()))
		return errors.New(fmt.Sprintf("[LRModel-Initialize] Lr model initialize error.%s", err2.Error()))
	}

	lr.Model = make(map[int]float64)
	for i := 0; i < len(fls.Weights); i++ {
		lr.Model[fls.Weights[i].Index] = fls.Weights[i].Value
	}

	lr.Init = true

	return nil
}
示例#5
0
func (fs *FtrlSolver) SaveModel(path string) error {
	log := util.GetLogger()
	if !fs.Init {
		log.Error("[FtrlSolver-SaveModel] Ftrl solver initialize error.")
		return errors.New("[FtrlSolver-SaveModel] Ftrl solver initialize error.")
	}

	file, err := os.Create(path)
	if err != nil {
		log.Error(fmt.Sprintf("[FtrlSolver-SaveModel] Ftrl solver save model error.%s", err.Error()))
		return errors.New(fmt.Sprintf("[FtrlSolver-SaveModel] Ftrl solver save model error.%s", err.Error()))
	}

	fs.Weights = make(util.Pvector, fs.Featnum)
	for i := 0; i < fs.Featnum; i++ {
		val := util.Round(fs.GetWeight(i), 5)
		fs.Weights[i] = util.Pair{i, val}
	}

	b, err2 := json.Marshal(fs)
	if err2 != nil {
		log.Error(fmt.Sprintf("[FtrlSolver-SaveModel] Ftrl solver save model error.%s", err2.Error()))
		return errors.New(fmt.Sprintf("[FtrlSolver-SaveModel] Ftrl solver save model error.%s", err2.Error()))
	}

	_, err = file.Write(b)
	if err != nil {
		log.Error(fmt.Sprintf("[FtrlSolver-SaveModel] Ftrl solver save model error.%s", err2.Error()))
		return errors.New(fmt.Sprintf("[FtrlSolver-SaveModel] Ftrl solver save model error.%s", err2.Error()))
	}

	return nil
}
示例#6
0
func (lan *Lands) Initialize(configFile string) error {

	var err error
	lan.mux = make(map[string]func(http.ResponseWriter, *util.ModelParam) error)
	lan.mux["/goline/online"] = lan.onlineServeHttp
	lan.mux["/goline/offline"] = lan.offlineServeHttp
	lan.mux["/goline/predict"] = lan.predictServeHttp

	file, err := ioutil.ReadFile(configFile)
	if err != nil {
		lan.log4goline.Error("[Lands-Initialize]Open Config error." + err.Error())
		return errors.New("[Lands-Initialize]Open Config error." + err.Error())
	}

	temp := new(util.Config)
	if err = json.Unmarshal(file, temp); err != nil {
		lan.log4goline.Error("[Lands-Initialize]Parse config file error." + err.Error())
		return errors.New("[Lands-Initialize]Parse config file error." + err.Error())
	}

	lan.conf = temp

	lan.pool = util.InitRedisPool(&lan.conf.Redis)
	if lan.pool == nil {
		lan.log4goline.Error("[Lands-Initialize]Initialize redis pool error.")
		return errors.New("[Lands-Initialize]Initialize redis pool error.")
	}

	util.InitLogger(lan.conf.LogModule)
	lan.log4goline = util.GetLogger()

	return nil
}
示例#7
0
func (lft *LockFreeFtrlTrainer) Initialize(
	epoch int,
	num_threads int,
	cache_feature_num bool) bool {
	lft.Epoch = epoch
	lft.CacheFeatureNum = cache_feature_num
	lft.NumThreads = num_threads
	lft.log = util.GetLogger()

	lft.Init = true
	return lft.Init
}
示例#8
0
func (fps *FtrlParamServer) Initialize(
	alpha float64,
	beta float64,
	l1 float64,
	l2 float64,
	n int,
	dropout float64) error {

	fps.log = util.GetLogger()
	if !fps.FtrlSolver.Initialize(alpha, beta, l1, l2, n, dropout) {
		fps.log.Error("[FtrlParamServer-Initialize] Fast ftrl solver initialize error.")
		return errors.New("[FtrlParamServer-Initialize] Fast ftrl solver initialize error.")
	}

	fps.ParamGroupNum = calc_group_num(n)
	fps.LockSlots = make([]sync.Mutex, fps.ParamGroupNum)

	fps.Init = true
	return nil
}
示例#9
0
func (fs *FtrlSolver) SaveEncodeModel() (string, error) {
	log := util.GetLogger()
	if !fs.Init {
		log.Error("[FtrlSolver-SaveEncodeModel] Ftrl solver initialize error.")
		return "", errors.New("[FtrlSolver-SaveEncodeModel] Ftrl solver initialize error.")
	}

	fs.Weights = make(util.Pvector, fs.Featnum)
	for i := 0; i < fs.Featnum; i++ {
		val := util.Round(fs.GetWeight(i), 5)
		fs.Weights[i] = util.Pair{i, val}
	}

	b, err := json.Marshal(fs)
	if err != nil {
		log.Error(fmt.Sprintf("[FtrlSolver-SaveEncodeModel] Ftrl solver save model error.%s", err.Error()))
		return "", errors.New(fmt.Sprintf("[FtrlSolver-SaveEncodeModel] Ftrl solver save model error.%s", err.Error()))
	}

	return string(b), nil
}
示例#10
0
func (fft *FastFtrlTrainer) Initialize(
	epoch int,
	num_threads int,
	cache_feature_num bool,
	burn_in float64,
	push_step int,
	fetch_step int) bool {
	fft.Epoch = epoch
	fft.CacheFeatureNum = cache_feature_num
	fft.PusStep = push_step
	fft.FetchStep = fetch_step
	if num_threads == 0 {
		fft.NumThreads = runtime.NumCPU()
	} else {
		fft.NumThreads = num_threads
	}

	fft.Init = true
	fft.BurnIn = burn_in
	fft.log4fft = util.GetLogger()
	return fft.Init
}
示例#11
0
func (fw *FtrlWorker) Initialize(
	param_server *FtrlParamServer,
	push_step int,
	fetch_step int) bool {

	fw.FtrlSolver.Alpha = param_server.Alpha
	fw.FtrlSolver.Beta = param_server.Beta
	fw.FtrlSolver.L1 = param_server.L1
	fw.FtrlSolver.L2 = param_server.L2
	fw.FtrlSolver.Featnum = param_server.Featnum
	fw.FtrlSolver.Dropout = param_server.Dropout

	fw.NUpdate = make([]float64, fw.FtrlSolver.Featnum)
	fw.ZUpdate = make([]float64, fw.FtrlSolver.Featnum)
	fw.SetFloatZero(fw.NUpdate, fw.FtrlSolver.Featnum)
	fw.SetFloatZero(fw.ZUpdate, fw.FtrlSolver.Featnum)

	fw.N = make([]float64, fw.FtrlSolver.Featnum)
	fw.Z = make([]float64, fw.FtrlSolver.Featnum)
	if param_server.FetchParam(fw.N, fw.Z) != nil {
		return false
	}

	fw.ParamGroupNum = calc_group_num(fw.FtrlSolver.Featnum)
	fw.ParamGroupStep = make([]int, fw.ParamGroupNum)
	for i := 0; i < fw.ParamGroupNum; i++ {
		fw.ParamGroupStep[i] = 0
	}

	fw.PushStep = push_step
	fw.FetchStep = fetch_step

	fw.log = util.GetLogger()

	fw.FtrlSolver.Init = true
	return fw.FtrlSolver.Init
}
示例#12
0
func print_usage(argc int, argv []string) {
	log := util.GetLogger()
	log.Error(fmt.Sprintf("Usage:\n", ""))
	log.Error(fmt.Sprintf("\t%s job_name test_file model output_file threshold\n", argv[0]))
}
示例#13
0
func Run(argc int, argv []string) (string, error) {

	var job_name string
	var test_file string
	var model_file string
	var output_file string
	var threshold float64
	log := util.GetLogger()

	if len(argv) == 5 {
		job_name = argv[0]
		test_file = argv[1]
		model_file = argv[2]
		output_file = argv[3]
		threshold, _ = strconv.ParseFloat(argv[4], 64)
	} else {
		print_usage(argc, argv)
		log.Error("[Predictor-Run] Input parameters error.")
		return fmt.Sprintf(errorjson, "[Predictor-Run] Input parameters error."), errors.New("[Predictor-Run] Input parameters error.")
	}

	if len(job_name) == 0 || len(test_file) == 0 || len(model_file) == 0 || len(output_file) == 0 {
		print_usage(argc, argv)
		log.Error("[Predictor-Run] Input parameters error.")
		return fmt.Sprintf(errorjson, "[Predictor-Run] Input parameters error."), errors.New("[Predictor-Run] Input parameters error.")
	}

	var model solver.LRModel
	model.Initialize(model_file)

	var wfp *os.File
	var err1 error
	exist := func(filename string) bool {
		var exist = true
		if _, err := os.Stat(filename); os.IsNotExist(err) {
			exist = false
		}
		return exist
	}

	if exist(output_file) {
		wfp, err1 = os.OpenFile(output_file, os.O_SYNC, 0666)
	} else {
		wfp, err1 = os.Create(output_file)
	}

	if err1 != nil {
		log.Error("[Predictor-Run] Open file error." + err1.Error())
		return fmt.Sprintf(errorjson, err1.Error()), errors.New("[Predictor-Run] Open file error." + err1.Error())
	}

	defer wfp.Close()

	cnt := 0      //样本总数
	pcorrect := 0 //正样本预测正确数
	pcnt := 0     //正样本总数
	ncorrect := 0 //负样本预测正确数
	var loss float64 = 0.
	var parser trainer.FileParser
	err := parser.OpenFile(test_file)
	if err != nil {
		log.Error("[Predictor-Run] Open file error." + err.Error())
		return fmt.Sprintf(errorjson, err.Error()), errors.New("[Predictor-Run] Open file error." + err.Error())
	}

	var pred_scores util.Dvector

	for {
		res, y, x := parser.ReadSample()
		if res != nil {
			break
		}

		pred := model.Predict(x)
		pred = math.Max(math.Min(pred, 1.-10e-15), 10e-15)
		wfp.WriteString(fmt.Sprintf("%f\n", pred))

		pred_scores = append(pred_scores, util.DPair{pred, y})

		cnt++
		if util.UtilFloat64Equal(y, 1.0) {
			pcnt++
		}

		var pred_label float64 = 0
		if pred > threshold {
			pred_label = 1
		}

		if util.UtilFloat64Equal(pred_label, y) {
			if util.UtilFloat64Equal(y, 1.0) {
				pcorrect++
			} else {
				ncorrect++
			}
		}

		pred = math.Max(math.Min(pred, 1.-10e-15), 10e-15)
		if y > 0 {
			loss += -math.Log(pred)
		} else {
			loss += -math.Log(1. - pred)
		}

	}

	auc := calc_auc(pred_scores)
	if auc < 0.5 {
		auc = 0.5
	}

	if cnt > 0 {
		log.Info(fmt.Sprintf("[%s] Log-likelihood = %f\n", job_name, float64(loss)/float64(cnt)))
		log.Info(fmt.Sprintf("[%s] Precision = %.2f%% (%d/%d)\n", job_name,
			float64(pcorrect*100)/float64(cnt-pcnt-ncorrect+pcorrect),
			pcorrect, cnt-pcnt-ncorrect+pcorrect))
		log.Info(fmt.Sprintf("[%s] Recall = %.2f%% (%d/%d)\n", job_name,
			float64(pcorrect*100)/float64(pcnt), pcorrect, pcnt))
		log.Info(fmt.Sprintf("[%s] Accuracy = %.2f%% (%d/%d)\n", job_name,
			float64((pcorrect+ncorrect)*100)/float64(cnt), (pcorrect + ncorrect), cnt))
		log.Info(fmt.Sprintf("[%s] AUC = %f\n", job_name, auc))
	}

	parser.CloseFile()

	util.Write2File(output_file, fmt.Sprintf(" Log-likelihood = %f\n Precision = %f (%d/%d)\n Recall = %f (%d/%d)\n Accuracy = %f (%d/%d)\n AUC = %f\n",
		float64(loss)/float64(cnt),
		float64(pcorrect)/float64(cnt-pcnt-ncorrect+pcorrect), pcorrect, cnt-pcnt-ncorrect+pcorrect,
		float64(pcorrect)/float64(pcnt), pcorrect, pcnt,
		float64(pcorrect+ncorrect)/float64(cnt), pcorrect+ncorrect, cnt,
		auc))

	return fmt.Sprintf(returnJson,
		job_name,
		fmt.Sprintf("Log-likelihood = %f", float64(loss)/float64(cnt)),
		fmt.Sprintf("Precision = %f (%d/%d)", float64(pcorrect)/float64(cnt-pcnt-ncorrect+pcorrect), pcorrect, cnt-pcnt-ncorrect+pcorrect),
		fmt.Sprintf("Recall = %f (%d/%d)", float64(pcorrect)/float64(pcnt), pcorrect, pcnt),
		fmt.Sprintf("Accuracy = %f (%d/%d)", float64((pcorrect+ncorrect))/float64(cnt), (pcorrect+ncorrect), cnt),
		fmt.Sprintf("AUC = %f", auc),
		output_file), nil
}
示例#14
0
func read_problem_info(
	train_file string,
	read_cache bool,
	num_threads int) (int, int, error) {

	feat_num := 0
	line_cnt := 0

	log := util.GetLogger()

	var lock sync.Mutex
	var parser FileParser
	var errall error

	read_from_cache := func(path string) error {
		fs, err := os.Open(path)
		defer fs.Close()
		if err != nil {
			return err
		}

		bfRd := bufio.NewReader(fs)
		line, err := bfRd.ReadString('\n')
		if err != nil {
			return err
		}

		var res []string = s.Split(line, " ")
		if len(res) != 2 {
			log.Error("[read_problem_info] File format error.")
			return errors.New("[read_problem_info] File format error.")
		}

		feat_num, errall = strconv.Atoi(res[0])
		if errall != nil {
			log.Error("[read_problem_info] Label format error." + errall.Error())
			return errors.New("[read_problem_info] Label format error." + errall.Error())
		}
		line_cnt, errall = strconv.Atoi(res[1])
		if errall != nil {
			log.Error("[read_problem_info] Feature format error." + errall.Error())
			return errors.New("[read_problem_info] Feature format error." + errall.Error())
		}

		return nil
	}

	exist := func(filename string) bool {
		var exist = true
		if _, err := os.Stat(filename); os.IsNotExist(err) {
			exist = false
		}
		return exist
	}

	write_to_cache := func(filename string) error {
		var f *os.File
		var err1 error
		if exist(filename) {
			f, err1 = os.OpenFile(filename, os.O_WRONLY, 0666)
		} else {
			f, err1 = os.Create(filename)
		}

		if err1 != nil {
			return err1
		}

		defer f.Close()

		wireteString := string(feat_num) + " " + string(line_cnt) + "\n"
		_, err1 = io.WriteString(f, wireteString)
		if err1 != nil {
			return err1
		}
		return nil
	}

	read_problem_worker := func(i int, c *sync.WaitGroup) {
		local_max_feat := 0
		local_count := 0
		for {
			flag, _, local_x := parser.ReadSampleMultiThread()
			if flag != nil {
				break
			}

			for i := 0; i < len(local_x); i++ {
				if local_x[i].Index+1 > local_max_feat {
					local_max_feat = local_x[i].Index + 1
				}
			}
			local_count++
		}

		lock.Lock()
		line_cnt += local_count
		lock.Unlock()
		if local_max_feat > feat_num {
			feat_num = local_max_feat
		}

		defer c.Done()
	}

	cache_file := string(train_file) + ".cache"
	cache_exists := exist(cache_file)
	if read_cache && cache_exists {
		read_from_cache(cache_file)
	} else {
		parser.OpenFile(train_file)
		util.UtilParallelRun(read_problem_worker, num_threads)
		parser.CloseFile()
	}

	log.Info(fmt.Sprintf("[read_problem_info] Instances=[%d] features=[%d]\n", line_cnt, feat_num))

	if read_cache && !cache_exists {
		write_to_cache(cache_file)
	}

	return feat_num, line_cnt, nil
}