func (data_set *DataSet) calculateCorrelations() (features_correlation [][]float64, answer_correlation []float64) { // prepare data features_correlation = make([][]float64, data_set.FeaturesNum) for i := range features_correlation { features_correlation[i] = make([]float64, data_set.FeaturesNum) } answer_correlation = make([]float64, data_set.FeaturesNum) answers := make([]float64, len(data_set.Classes)) for i := range data_set.Classes { answers[i] = float64(data_set.Classes[i]) } // calculate correlations for i := range data_set.SamplesByFeature { for j := 0; j < i; j++ { corr := math.Abs(stat.Correlation(data_set.SamplesByFeature[i], data_set.SamplesByFeature[j], nil)) features_correlation[i][j] = corr features_correlation[j][i] = corr } } for i := range data_set.SamplesByFeature { answer_correlation[i] = math.Abs(stat.Correlation(data_set.SamplesByFeature[i], answers, nil)) } return }
func cost2(params score.Parameters, trainset []TrainItem) float64 { var totalScore float64 enerchan := make(chan float64, len(trainset)) // rankchan := make(chan float64, len(trainset)) exp := make([]float64, len(trainset)) obs := make([]float64, len(trainset)) enerScore := 0.0 // rankScore := 0.0 for i := 0; i < len(trainset); i++ { go func(i int) { // protein := protein.LoadMol2("./traindata/" + traindata[i].Receptor) protein := trainset[i].Receptor // pos := ligand.LoadMol2("./traindata/" + traindata[i].Positive) pos := trainset[i].Positive total := params.Score(&protein, &pos) // rk := 0.0 // for j := 0; j < len(trainset[i].Negatives); j++ { // // neg := ligand.LoadMol2("./traindata/" + traindata[i].Negatives[j]) // neg := trainset[i].Negatives[j] // negTotal := params.Score(&protein, &neg) // if negTotal <= total { // rk += 1.0 // } // } exp[i] = trainset[i].Energy obs[i] = total enerchan <- ((trainset[i].Energy - total) * (trainset[i].Energy - total)) // rankchan <- rk }(i) } for i := 0; i < len(trainset); i++ { enerScore += <-enerchan // rankScore += <-rankchan } corr := stat.Correlation(exp, obs, nil) // totalScore = enerScore/(corr*corr) + (enerScore / (corr * corr) * rankScore) // totalScore = enerScore*(2000.0-1999.0*corr) + (math.Sqrt(enerScore) * (2000.0 - 1999.0*corr) * rankScore * 1000000.0) totalScore = enerScore * (2000.0 - 1999.0*corr) // fmt.Printf("PKD %f - Rank %f - Corr %f - TOTAL %f\n", enerScore, rankScore, corr, totalScore) fmt.Printf("Energy %f - Corr %f - TOTAL %f\n", math.Sqrt(enerScore/float64(len(trainset))), corr, totalScore) return totalScore }
func cost3(params score.Parameters, trainset []TrainItem) float64 { var totalScore float64 // enerchan := make(chan float64, len(trainset)) rankchan := make(chan float64, len(trainset)) exp := make([]float64, len(trainset)) obs := make([]float64, len(trainset)) // nNegatives := 0.0 // enerScore := 0.0 rankScore := 0.0 for i := 0; i < len(trainset); i++ { go func(i int) { protein := trainset[i].Receptor pos := trainset[i].Positive total := params.Score(&protein, &pos) rk := 0 for j := 0; j < len(trainset[i].Negatives); j++ { neg := trainset[i].Negatives[j] negTotal := params.Score(&protein, &neg) if negTotal <= total { rk += 1 if rk == 5 { break } if rk > 5 { fmt.Println("F**K") } } } exp[i] = trainset[i].Energy obs[i] = total // enerchan <- ((trainset[i].Energy - total) * (trainset[i].Energy - total)) rankchan <- float64(rk) / 5.0 // nNegatives += float64(len(trainset[i].Negatives)) }(i) } // var ranktmp float64 for i := 0; i < len(trainset); i++ { rankScore += <-rankchan } // ranktmp = <-rankchan // if ranktmp > rankScore { // rankScore = ranktmp // } // // enerScore += <-enerchan // // // } rankScore = rankScore / float64(len(trainset)) corr := stat.Correlation(exp, obs, nil) corrSquared := 0.0 if corr > 0.0 { corrSquared = corr * corr } // totalScore = enerScore/(corr*corr) + (enerScore / (corr * corr) * rankScore) // totalScore = enerScore*(2000.0-1999.0*corr) + (math.Sqrt(enerScore) * (2000.0 - 1999.0*corr) * rankScore * 1000000.0) totalScore = (math.Pow(1-corrSquared, 2) + math.Pow(rankScore, 2)) * 1000 // fmt.Printf("PKD %f - Rank %f - Corr %f - TOTAL %f\n", enerScore, rankScore, corr, totalScore) fmt.Printf("Corr %f - Corr^2 %f - Rank %f - TOTAL %f\n", corr, corrSquared, rankScore, totalScore) return totalScore }