示例#1
0
func (data_set *DataSet) calculateCorrelations() (features_correlation [][]float64, answer_correlation []float64) {
	// prepare data
	features_correlation = make([][]float64, data_set.FeaturesNum)
	for i := range features_correlation {
		features_correlation[i] = make([]float64, data_set.FeaturesNum)
	}

	answer_correlation = make([]float64, data_set.FeaturesNum)
	answers := make([]float64, len(data_set.Classes))
	for i := range data_set.Classes {
		answers[i] = float64(data_set.Classes[i])
	}

	// calculate correlations
	for i := range data_set.SamplesByFeature {
		for j := 0; j < i; j++ {
			corr := math.Abs(stat.Correlation(data_set.SamplesByFeature[i], data_set.SamplesByFeature[j], nil))
			features_correlation[i][j] = corr
			features_correlation[j][i] = corr
		}
	}

	for i := range data_set.SamplesByFeature {
		answer_correlation[i] = math.Abs(stat.Correlation(data_set.SamplesByFeature[i], answers, nil))
	}

	return
}
示例#2
0
文件: train.go 项目: jgcarvalho/zdd
func cost2(params score.Parameters, trainset []TrainItem) float64 {
	var totalScore float64
	enerchan := make(chan float64, len(trainset))
	// rankchan := make(chan float64, len(trainset))
	exp := make([]float64, len(trainset))
	obs := make([]float64, len(trainset))
	enerScore := 0.0
	// rankScore := 0.0
	for i := 0; i < len(trainset); i++ {
		go func(i int) {
			// protein := protein.LoadMol2("./traindata/" + traindata[i].Receptor)
			protein := trainset[i].Receptor
			// pos := ligand.LoadMol2("./traindata/" + traindata[i].Positive)
			pos := trainset[i].Positive
			total := params.Score(&protein, &pos)

			// rk := 0.0
			// for j := 0; j < len(trainset[i].Negatives); j++ {
			// 	// neg := ligand.LoadMol2("./traindata/" + traindata[i].Negatives[j])
			// 	neg := trainset[i].Negatives[j]
			// 	negTotal := params.Score(&protein, &neg)
			// 	if negTotal <= total {
			// 		rk += 1.0
			// 	}
			// }
			exp[i] = trainset[i].Energy
			obs[i] = total
			enerchan <- ((trainset[i].Energy - total) * (trainset[i].Energy - total))
			// rankchan <- rk
		}(i)
	}

	for i := 0; i < len(trainset); i++ {
		enerScore += <-enerchan
		// rankScore += <-rankchan
	}
	corr := stat.Correlation(exp, obs, nil)

	// totalScore = enerScore/(corr*corr) + (enerScore / (corr * corr) * rankScore)
	// totalScore = enerScore*(2000.0-1999.0*corr) + (math.Sqrt(enerScore) * (2000.0 - 1999.0*corr) * rankScore * 1000000.0)
	totalScore = enerScore * (2000.0 - 1999.0*corr)
	// fmt.Printf("PKD %f - Rank %f - Corr %f - TOTAL %f\n", enerScore, rankScore, corr, totalScore)
	fmt.Printf("Energy %f - Corr %f - TOTAL %f\n", math.Sqrt(enerScore/float64(len(trainset))), corr, totalScore)
	return totalScore
}
示例#3
0
文件: train.go 项目: jgcarvalho/zdd
func cost3(params score.Parameters, trainset []TrainItem) float64 {
	var totalScore float64
	// enerchan := make(chan float64, len(trainset))
	rankchan := make(chan float64, len(trainset))
	exp := make([]float64, len(trainset))
	obs := make([]float64, len(trainset))
	// nNegatives := 0.0
	// enerScore := 0.0
	rankScore := 0.0
	for i := 0; i < len(trainset); i++ {
		go func(i int) {
			protein := trainset[i].Receptor
			pos := trainset[i].Positive
			total := params.Score(&protein, &pos)
			rk := 0
			for j := 0; j < len(trainset[i].Negatives); j++ {
				neg := trainset[i].Negatives[j]
				negTotal := params.Score(&protein, &neg)
				if negTotal <= total {
					rk += 1
					if rk == 5 {
						break
					}
					if rk > 5 {
						fmt.Println("F**K")
					}
				}

			}
			exp[i] = trainset[i].Energy
			obs[i] = total
			// enerchan <- ((trainset[i].Energy - total) * (trainset[i].Energy - total))
			rankchan <- float64(rk) / 5.0
			// nNegatives += float64(len(trainset[i].Negatives))
		}(i)
	}

	// var ranktmp float64
	for i := 0; i < len(trainset); i++ {
		rankScore += <-rankchan
	}
	// 	ranktmp = <-rankchan
	// 	if ranktmp > rankScore {
	// 		rankScore = ranktmp
	// 	}
	// 	// enerScore += <-enerchan
	// 	//
	// }

	rankScore = rankScore / float64(len(trainset))

	corr := stat.Correlation(exp, obs, nil)
	corrSquared := 0.0
	if corr > 0.0 {
		corrSquared = corr * corr
	}

	// totalScore = enerScore/(corr*corr) + (enerScore / (corr * corr) * rankScore)
	// totalScore = enerScore*(2000.0-1999.0*corr) + (math.Sqrt(enerScore) * (2000.0 - 1999.0*corr) * rankScore * 1000000.0)
	totalScore = (math.Pow(1-corrSquared, 2) + math.Pow(rankScore, 2)) * 1000
	// fmt.Printf("PKD %f - Rank %f - Corr %f - TOTAL %f\n", enerScore, rankScore, corr, totalScore)
	fmt.Printf("Corr %f - Corr^2 %f - Rank %f - TOTAL %f\n", corr, corrSquared, rankScore, totalScore)
	return totalScore
}